Scipy Curve Fit Optimize not working for log scale values - python

So I am trying to fit a set of data points to this equation:
abs(I) = Io(exp((qV)/(nKT)) - 1) --- Shockley diode equation
to a bunch of data points I was given. Knowing the V and the I values, I need to optimize the Io and the n values to get me data closely matching the data set I was given.
However, scipy optimize curve fit is not giving me the values I want, which is where n = ~1.15 and Io = ~1.8E-13, and is instead giving me n = 2.12 and I = 2.11E-11. I suspect this is due to the data set values being very small numbers, messing with the optimization, but even when i set the initial guess to be n = 1.15 and Io = 1.8E-13, the optimization values do not change.
Does anyone have any tips on how to fix this?
import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
Voltage = np.array([-0.5 , -0.49, -0.48, -0.47, -0.46, -0.45, -0.44, -0.43, -0.42,
-0.41, -0.4 , -0.39, -0.38, -0.37, -0.36, -0.35, -0.34, -0.33,
-0.32, -0.31, -0.3 , -0.29, -0.28, -0.27, -0.26, -0.25, -0.24,
-0.23, -0.22, -0.21, -0.2 , -0.19, -0.18, -0.17, -0.16, -0.15,
-0.14, -0.13, -0.12, -0.11, -0.1 , -0.09, -0.08, -0.07, -0.06,
-0.05, -0.04, -0.03, -0.02, -0.01, 0. , 0.01, 0.02, 0.03,
0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11, 0.12,
0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 ,
0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 ])
Current = np.array([ 6.99000000e-13, 6.83000000e-13, 6.57000000e-13,
6.46000000e-13, 6.19000000e-13, 6.07000000e-13,
5.86000000e-13, 5.73000000e-13, 5.55000000e-13,
5.37000000e-13, 5.27000000e-13, 5.08000000e-13,
4.92000000e-13, 4.75000000e-13, 4.61000000e-13,
4.43000000e-13, 4.32000000e-13, 4.18000000e-13,
3.99000000e-13, 3.91000000e-13, 3.79000000e-13,
3.66000000e-13, 3.54000000e-13, 3.43000000e-13,
3.34000000e-13, 3.18000000e-13, 3.06000000e-13,
2.96000000e-13, 2.86000000e-13, 2.77000000e-13,
2.66000000e-13, 2.59000000e-13, 2.54000000e-13,
2.43000000e-13, 2.33000000e-13, 2.22000000e-13,
2.16000000e-13, 2.07000000e-13, 2.00000000e-13,
1.94000000e-13, 1.85000000e-13, 1.77000000e-13,
1.68000000e-13, 1.58000000e-13, 1.48000000e-13,
1.35000000e-13, 1.21000000e-13, 1.03000000e-13,
7.53000000e-14, 4.32000000e-14, 2.33000000e-15,
6.46000000e-14, 1.57000000e-13, 2.82000000e-13,
4.58000000e-13, 7.07000000e-13, 1.06000000e-12,
1.57000000e-12, 2.28000000e-12, 3.29000000e-12,
4.75000000e-12, 6.80000000e-12, 9.76000000e-12,
1.39000000e-11, 1.82000000e-11, 2.57000000e-11,
3.67000000e-11, 5.21000000e-11, 7.39000000e-11,
1.04000000e-10, 1.62000000e-10, 2.27000000e-10,
3.21000000e-10, 4.48000000e-10, 6.21000000e-10,
8.70000000e-10, 1.20000000e-09, 1.66000000e-09,
2.27000000e-09, 3.08000000e-09, 4.13000000e-09,
5.46000000e-09, 7.05000000e-09, 8.85000000e-09,
1.11000000e-08, 1.39000000e-08, 1.74000000e-08,
2.05000000e-08, 2.28000000e-08, 2.52000000e-08,
2.91000000e-08])
def diode_function(V, n, Io):
kt = 300 * 1.38 * math.pow(10, -23)
q = 1.60 * math.pow(10, -19)
I_final = Io * (np.exp( (q * V) / (n * kt) ) - 1)
return abs(I_final)
p0 = [1.15, 1.8e-13]
popt, pcov = curve_fit(diode_function, Voltage, Current, p0 = p0)
print(popt)
fig = plt.figure()
ax = fig.add_subplot(121)
ax.set_title('I_d vs V_d')
ax.set_xlabel('V_d')
ax.set_ylabel('I_d')
ax.set_yscale('log')
plt.plot(Voltage, Current, 'ko', label="Original Data")
plt.plot(Voltage, diode_function(Voltage, *popt), 'r-', label="Fitted Curve")
plt.legend(loc='best')
ax = fig.add_subplot(122)
ax.set_title('I_d vs V_d')
ax.set_xlabel('V_d')
ax.set_ylabel('I_d')
ax.set_yscale('log')
popt = [1.15,1.8e-13]
plt.plot(Voltage, Current, 'ko', label="Original Data")
plt.plot(Voltage, diode_function(Voltage, *popt), 'r-', label="Fitted Curve")
plt.legend(loc='best')
plt.show()
Picture of the graph:
The left graph is with scipy optimization and the right graph is the one I want

I guess you are on the right track, using the logarithm to scale the data such that the differences are much lower. In order to prevent problems with logarithms, one usual option is to add a constant. Instead of log(x), one would use log(x+constant). This constant needs to be 1 or higher.
Using different constants still gives different results though, again because larger values are weighted higher in the least-squares method.
# imports and data as in question
def diode_function(V, n, Io):
kt = 300 * 1.38e-23
q = 1.60e-19
I_final = Io * (np.exp( (q * V) / (n * kt) ) - 1)
return np.abs(I_final)
p0 = [1.15, 1.8e-13]
popt, pcov = curve_fit(diode_function, Voltage, Current, p0 = p0)
fig, ax = plt.subplots()
ax.set_title('I_d vs V_d')
ax.set_xlabel('V_d')
ax.set_ylabel('I_d')
ax.set_yscale('log')
ax.plot(Voltage, Current, 'ko', label="Original Data")
offsets = [1,15]
colors = ["limegreen", "crimson"]
for offset, color in zip(offsets,colors):
logdf = lambda V,n,Io: np.log10(diode_function(V, n, Io)+offset)
poptn, pcovn = curve_fit(logdf, Voltage, np.log10(Current+offset), p0 = p0)
ax.plot(Voltage, 10**(logdf(Voltage, *poptn))-offset,
color=color, label="fit (offset: {})".format(offset))
ax.legend(loc='best')
plt.show()

Related

In a radar chart in matplotlib, is there any way to stagger the positions of the yticks (or rticks) so that they do not appear in the same line?

I have the following code from the matplotlib demo:
import matplotlib.pyplot as plt
from matplotlib.path import Path
from matplotlib.spines import Spine
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
import glob
import pygal
import pandas as pd
import numpy as np
def radar_factory(num_vars, frame='circle'):
"""Create a radar chart with `num_vars` axes.
This function creates a RadarAxes projection and registers it.
Parameters
----------
num_vars : int
Number of variables for radar chart.
frame : {'circle' | 'polygon'}
Shape of frame surrounding axes.
"""
# calculate evenly-spaced axis angles
theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
# rotate theta such that the first axis is at the top
theta += np.pi/2
def draw_poly_patch(self):
verts = unit_poly_verts(theta)
return plt.Polygon(verts, closed=True, edgecolor='k')
def draw_circle_patch(self):
# unit circle centered on (0.5, 0.5)
return plt.Circle((0.5, 0.5), 0.5)
patch_dict = {'polygon': draw_poly_patch, 'circle': draw_circle_patch}
if frame not in patch_dict:
raise ValueError('unknown value for `frame`: %s' % frame)
class RadarAxes(PolarAxes):
name = 'radar'
# use 1 line segment to connect specified points
RESOLUTION = 1
# define draw_frame method
draw_patch = patch_dict[frame]
def fill(self, *args, **kwargs):
"""Override fill so that line is closed by default"""
closed = kwargs.pop('closed', True)
return super(RadarAxes, self).fill(closed=closed, *args, **kwargs)
def plot(self, *args, **kwargs):
"""Override plot so that line is closed by default"""
lines = super(RadarAxes, self).plot(*args, **kwargs)
for line in lines:
self._close_line(line)
def _close_line(self, line):
x, y = line.get_data()
# FIXME: markers at x[0], y[0] get doubled-up
if x[0] != x[-1]:
x = np.concatenate((x, [x[0]]))
y = np.concatenate((y, [y[0]]))
line.set_data(x, y)
def set_varlabels(self, labels):
self.set_thetagrids(np.degrees(theta)) #labels
def _gen_axes_patch(self):
return self.draw_patch()
def _gen_axes_spines(self):
if frame == 'circle':
return PolarAxes._gen_axes_spines(self)
# The following is a hack to get the spines (i.e. the axes frame)
# to draw correctly for a polygon frame.
# spine_type must be 'left', 'right', 'top', 'bottom', or `circle`.
spine_type = 'circle'
verts = unit_poly_verts(theta)
# close off polygon by repeating first vertex
verts.append(verts[0])
path = Path(verts)
spine = Spine(self, spine_type, path)
spine.set_transform(self.transAxes)
return {'polar': spine}
register_projection(RadarAxes)
return theta
def unit_poly_verts(theta):
"""Return vertices of polygon for subplot axes.
This polygon is circumscribed by a unit circle centered at (0.5, 0.5)
"""
x0, y0, r = [0.5] * 3
verts = [(r*np.cos(t) + x0, r*np.sin(t) + y0) for t in theta]
return verts
def example_data():
# The following data is from the Denver Aerosol Sources and Health study.
# See doi:10.1016/j.atmosenv.2008.12.017
#
# The data are pollution source profile estimates for five modeled
# pollution sources (e.g., cars, wood-burning, etc) that emit 7-9 chemical
# species. The radar charts are experimented with here to see if we can
# nicely visualize how the modeled source profiles change across four
# scenarios:
# 1) No gas-phase species present, just seven particulate counts on
# Sulfate
# Nitrate
# Elemental Carbon (EC)
# Organic Carbon fraction 1 (OC)
# Organic Carbon fraction 2 (OC2)
# Organic Carbon fraction 3 (OC3)
# Pyrolized Organic Carbon (OP)
# 2)Inclusion of gas-phase specie carbon monoxide (CO)
# 3)Inclusion of gas-phase specie ozone (O3).
# 4)Inclusion of both gas-phase species is present...
data = [
['Sulfate', 'Nitrate', 'EC', 'OC1', 'OC2', 'OC3', 'OP', 'CO', 'O3'],
('Basecase', [
[0.88, 0.01, 0.03, 0.03, 0.00, 0.06, 0.01, 0.00, 0.00],
[0.07, 0.95, 0.04, 0.05, 0.00, 0.02, 0.01, 0.00, 0.00],
[0.01, 0.02, 0.85, 0.19, 0.05, 0.10, 0.00, 0.00, 0.00],
[0.02, 0.01, 0.07, 0.01, 0.21, 0.12, 0.98, 0.00, 0.00],
[0.01, 0.01, 0.02, 0.71, 0.74, 0.70, 0.00, 0.00, 0.00]]),
('With CO', [
[0.88, 0.02, 0.02, 0.02, 0.00, 0.05, 0.00, 0.05, 0.00],
[0.08, 0.94, 0.04, 0.02, 0.00, 0.01, 0.12, 0.04, 0.00],
[0.01, 0.01, 0.79, 0.10, 0.00, 0.05, 0.00, 0.31, 0.00],
[0.00, 0.02, 0.03, 0.38, 0.31, 0.31, 0.00, 0.59, 0.00],
[0.02, 0.02, 0.11, 0.47, 0.69, 0.58, 0.88, 0.00, 0.00]]),
('With O3', [
[0.89, 0.01, 0.07, 0.00, 0.00, 0.05, 0.00, 0.00, 0.03],
[0.07, 0.95, 0.05, 0.04, 0.00, 0.02, 0.12, 0.00, 0.00],
[0.01, 0.02, 0.86, 0.27, 0.16, 0.19, 0.00, 0.00, 0.00],
[0.01, 0.03, 0.00, 0.32, 0.29, 0.27, 0.00, 0.00, 0.95],
[0.02, 0.00, 0.03, 0.37, 0.56, 0.47, 0.87, 0.00, 0.00]]),
('CO & O3', [
[0.87, 0.01, 0.08, 0.00, 0.00, 0.04, 0.00, 0.00, 0.01],
[0.09, 0.95, 0.02, 0.03, 0.00, 0.01, 0.13, 0.06, 0.00],
[0.01, 0.02, 0.71, 0.24, 0.13, 0.16, 0.00, 0.50, 0.00],
[0.01, 0.03, 0.00, 0.28, 0.24, 0.23, 0.00, 0.44, 0.88],
[0.02, 0.00, 0.18, 0.45, 0.64, 0.55, 0.86, 0.00, 0.16]])
]
return data
if __name__ == '__main__':
N = 24
theta = radar_factory(N, frame='circle')
folder = glob.glob('/*.csv',recursive=True)
data = folder[0]
data = pd.read_csv(data)
unique_hours = data['Hour'].unique().reshape(1, -1)
unique_hours = unique_hours.tolist()
hourly_alarm = np.empty((24, 1))
for i in range(0, 24):
hourly_alarm[i] = data.loc[data['Hour'] == i, 'Number of Alarms'].sum()
hourly_alarm = hourly_alarm.reshape(1, -1)
hourly_alarm = hourly_alarm.tolist()
unique_hours[0].sort()
# data = example_data()
data = []
data.extend(unique_hours)
data.extend(hourly_alarm)
spoke_labels = data.pop(0)
fig, axes = plt.subplots(figsize=(9, 9), nrows=1, ncols=1,
subplot_kw=dict(projection='radar'))
#fig.subplots_adjust(wspace=0.25, hspace=0.20, top=0.85, bottom=0.05) #This line adjusts the amount of space between suplots. Not applicable here
colors = ['b', 'r', 'g', 'm', 'y']
# Plot the four cases from the example data on separate axes
axes.set_title('Blah', weight='bold', size='medium', position=(0.5, 1.1),
horizontalalignment='center', verticalalignment='center')
axes.scatter(theta, data[0])
This gives me a plot like this:
Here, I want to stagger the positions of the ylabels on the theta axis so that they do not appear in one line and do not look good.
Can I be pointed in the direction as to how to do this?
I tried to change the attributes of the PolarAxes class but it is not working.

Python Curve_Fit Exponential / Power / Log Curve - Improve Results

I am trying to fit this data which is asymptotically approaching zero (but never reaching it).
I believe the best curve is an Inverse Logistic Function, but open to suggestions. The Key is the decaying "S-curve" shape which is expected.
Here is the code I have so far, and the plot image below, which is a pretty ugly fit.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
# DATA
x = pd.Series([1,1,264,882,913,1095,1156,1217,1234,1261,1278,1460,1490,1490,1521,1578,1612,1612,1668,1702,1704,1735,1793,2024,2039,2313,2313,2558,2558,2617,2617,2708,2739,2770,2770,2831,2861,2892,2892,2892,2892,2892,2923,2923,2951,2951,2982,2982,3012,3012,3012,3012,3012,3012,3012,3073,3073,3073,3104,3104,3104,3104,3135,3135,3135,3135,3165,3165,3165,3165,3165,3196,3196,3196,3226,3226,3257,3316,3347,3347,3347,3347,3377,3377,3438,3469,3469]).values
y = pd.Series([1000,600,558.659217877095,400,300,100,7.75,6,8.54,6.66666666666667,7.14,1.1001100110011,1.12,0.89,1,2,0.666666666666667,0.77,1.12612612612613,0.7,0.664010624169987,0.65,0.51,0.445037828215398,0.27,0.1,0.26,0.1,0.1,0.13,0.16,0.1,0.13,0.1,0.12,0.1,0.13,0.14,0.14,0.17,0.11,0.15,0.09,0.1,0.26,0.16,0.09,0.09,0.05,0.09,0.09,0.1,0.1,0.11,0.11,0.09,0.09,0.11,0.08,0.09,0.09,0.1,0.06,0.07,0.07,0.09,0.05,0.05,0.06,0.07,0.08,0.08,0.07,0.1,0.08,0.08,0.05,0.06,0.04,0.04,0.05,0.05,0.04,0.06,0.05,0.05,0.06]).values
# Inverse Logistic Function
# https://en.wikipedia.org/wiki/Logistic_function
def func(x, L ,x0, k, b):
y = 1/(L / (1 + np.exp(-k*(x-x0)))+b)
return y
# FIT DATA
p0 = [max(y), np.median(x),1,min(y)] # this is an mandatory initial guess
popt, pcov = curve_fit(func, x, y,p0, method='dogbox',maxfev=10000)
# PERFORMANCE
modelPredictions = func(x, *popt)
absError = modelPredictions - y
SE = np.square(absError) # squared errors
MSE = np.mean(SE) # mean squared errors
RMSE = np.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (np.var(absError) / np.var(y))
print('Parameters:', popt)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
#PLOT
plt.figure()
plt.plot(x, y, 'ko', label="Original Noised Data")
plt.plot(x, func(x, *popt), 'r-', label="Fitted Curve")
plt.legend()
plt.yscale('log')
#plt.xscale('log')
plt.show()
Here is the result when this code is run... and what I would Like to achieve!
How can I better optimize the curve_fit, so that instead of the code generated RED line, I get something closer to the BLUE drawn line?
Thank you!!
From your plot of data and expected fit, I would guess that you do not really want to model your data y as a logistic-like step function but log(y) as a logistic-like step function.
So, I think you would probably want to use a logistic step function, perhaps adding a linear component to model the log of this data. I would do this with lmfit, as it comes with the models built-in, gives better reporting of resulting, and allows you to greatly simplify your fitting code as with (disclaimer: I am a lead author):
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from lmfit.models import StepModel, LinearModel
# DATA
x = pd.Series([1, 1, 264, 882, 913, 1095, 1156, 1217, 1234, 1261, 1278,
1460, 1490, 1490, 1521, 1578, 1612, 1612, 1668, 1702, 1704,
1735, 1793, 2024, 2039, 2313, 2313, 2558, 2558, 2617, 2617,
2708, 2739, 2770, 2770, 2831, 2861, 2892, 2892, 2892, 2892,
2892, 2923, 2923, 2951, 2951, 2982, 2982, 3012, 3012, 3012,
3012, 3012, 3012, 3012, 3073, 3073, 3073, 3104, 3104, 3104,
3104, 3135, 3135, 3135, 3135, 3165, 3165, 3165, 3165, 3165,
3196, 3196, 3196, 3226, 3226, 3257, 3316, 3347, 3347, 3347,
3347, 3377, 3377, 3438, 3469, 3469]).values
y = pd.Series([1000, 600, 558.659217877095, 400, 300, 100, 7.75, 6, 8.54,
6.66666666666667, 7.14, 1.1001100110011, 1.12, 0.89, 1, 2,
0.666666666666667, 0.77, 1.12612612612613, 0.7,
0.664010624169987, 0.65, 0.51, 0.445037828215398, 0.27, 0.1,
0.26, 0.1, 0.1, 0.13, 0.16, 0.1, 0.13, 0.1, 0.12, 0.1, 0.13,
0.14, 0.14, 0.17, 0.11, 0.15, 0.09, 0.1, 0.26, 0.16, 0.09,
0.09, 0.05, 0.09, 0.09, 0.1, 0.1, 0.11, 0.11, 0.09, 0.09,
0.11, 0.08, 0.09, 0.09, 0.1, 0.06, 0.07, 0.07, 0.09, 0.05,
0.05, 0.06, 0.07, 0.08, 0.08, 0.07, 0.1, 0.08, 0.08, 0.05,
0.06, 0.04, 0.04, 0.05, 0.05, 0.04, 0.06, 0.05, 0.05, 0.06]).values
model = StepModel(form='logistic') + LinearModel()
params = model.make_params(amplitude=-5, center=1000, sigma=100, intercept=0, slope=0)
result = model.fit(np.log(y), params, x=x)
print(result.fit_report())
plt.plot(x, y, 'ko', label="Original Noised Data")
plt.plot(x, np.exp(result.best_fit), 'r-', label="Fitted Curve")
plt.legend()
plt.yscale('log')
plt.show()
That will print out a report with fit statistics and best-fit values of:
[[Model]]
(Model(step, form='logistic') + Model(linear))
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 73
# data points = 87
# variables = 5
chi-square = 9.38961801
reduced chi-square = 0.11450754
Akaike info crit = -183.688405
Bayesian info crit = -171.358865
[[Variables]]
amplitude: -4.89008796 +/- 0.29600969 (6.05%) (init = -5)
center: 1180.65823 +/- 15.2836422 (1.29%) (init = 1000)
sigma: 94.0317580 +/- 18.5328976 (19.71%) (init = 100)
slope: -0.00147861 +/- 8.1151e-05 (5.49%) (init = 0)
intercept: 6.95177838 +/- 0.17170849 (2.47%) (init = 0)
[[Correlations]] (unreported correlations are < 0.100)
C(amplitude, slope) = -0.798
C(amplitude, sigma) = -0.649
C(amplitude, intercept) = -0.605
C(center, intercept) = -0.574
C(sigma, slope) = 0.542
C(sigma, intercept) = 0.348
C(center, sigma) = -0.335
C(amplitude, center) = 0.282
and produce a plot like this
You could certainly reproduce all that with scipy.optimize.curve_fit if you desired, but I would leave that as an exercise.
In your case I'd fit a hyperbolic tangent1 to the base-10 logarithm of your data.
Let's use
                                       log10 (y) = y₀ - a tanh (λ(x-x₀))
as your function
Approximately your x runs from 0 to 3500, your log10(y) from 3 to -1, with the provision that tanh(2) = -tanh(2) ≈ 1 we have
            y₀+a = 3, y0-a= -1 ⇒ y₀ = 1, a = 2;
            λ = (2-(-2)) / (3500-0); x₀ = (3500-0)/2.
(this rough estimate is necessary to provede curve_fit with an initial guess, otherwise the procedure gets lost).
Omitting the boilerplate I have eventually
X = np.linspace(0, 3500, 701)
plt.scatter(x, np.log10(y), label='data')
plt.plot(X, 1-2*np.tanh(4/3500*(X-1750)), label='hand fit')
(y0, a, l, x0), *_ = curve_fit(
lambda x, y0, a, l,x 0: y0 - a*np.tanh(l*(x-x0)),
x, np.log10(y),
p0=[1, 2, 4/3500, 3500/2])
plt.plot(X, y0-a*np.tanh(l*(X-x0)), label='curve_fit fit')
plt.legend()
Note 1: the logistic function is the hyperbolic tangent in disguise
I see that your plot uses log scaling, and I found that several different sigmoidal equations gave what appear to be good fits to the natural log of the Y data. Here is a graphical Python fitter using the natural log of the Y data with a four-parameter Logistic equation:
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import warnings
xData = numpy.array([1,1,264,882,913,1095,1156,1217,1234,1261,1278,1460,1490,1490,1521,1578,1612,1612,1668,1702,1704,1735,1793,2024,2039,2313,2313,2558,2558,2617,2617,2708,2739,2770,2770,2831,2861,2892,2892,2892,2892,2892,2923,2923,2951,2951,2982,2982,3012,3012,3012,3012,3012,3012,3012,3073,3073,3073,3104,3104,3104,3104,3135,3135,3135,3135,3165,3165,3165,3165,3165,3196,3196,3196,3226,3226,3257,3316,3347,3347,3347,3347,3377,3377,3438,3469,3469], dtype=float)
yData = numpy.array([1000,600,558.659217877095,400,300,100,7.75,6,8.54,6.66666666666667,7.14,1.1001100110011,1.12,0.89,1,2,0.666666666666667,0.77,1.12612612612613,0.7,0.664010624169987,0.65,0.51,0.445037828215398,0.27,0.1,0.26,0.1,0.1,0.13,0.16,0.1,0.13,0.1,0.12,0.1,0.13,0.14,0.14,0.17,0.11,0.15,0.09,0.1,0.26,0.16,0.09,0.09,0.05,0.09,0.09,0.1,0.1,0.11,0.11,0.09,0.09,0.11,0.08,0.09,0.09,0.1,0.06,0.07,0.07,0.09,0.05,0.05,0.06,0.07,0.08,0.08,0.07,0.1,0.08,0.08,0.05,0.06,0.04,0.04,0.05,0.05,0.04,0.06,0.05,0.05,0.06], dtype=float)
# fit the natural lpg of the data
yData = numpy.log(yData)
warnings.filterwarnings("ignore") # do not print "invalid value" warnings during fit
def func(x, a, b, c, d): # Four-Parameter Logistic from zunzun.com
return d + (a - d) / (1.0 + numpy.power(x / c, b))
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0, 1.0, 1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
print('Parameters:', fittedParameters)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Natural Log of Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
p0 = [max(y), np.median(x),1,min(y)] # this is an mandatory initial guess
Just to clarify, since this might be your issue, you shouldn't use "1.0" as your initial guess k. You should use 1.0 / (max(x) - min(x))
If your X's are data that ranges over say, [1200, 8000]. Then, using 1.0 will really struggle converge. You want to use 1/6800 as k, so you start off with a normalized [-1, 1] as your initial x-range.
Main reason being, p.exp(4000) will generally fail to evaluate, which will cause python to struggle to fit the function.

How do I estimate the right parameters for a cumulative gaussian fit?

I'm trying to fit a cumulative Gaussian distribution to my data, however the fits are clearly wrong. Why am I getting wrong means and standard deviations? Below you find my code and output.
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
testrefratios=np.array([ 0.2, 0.4, 0.6, 0.8, 0.9, 1. , 1.1, 1.2, 1.4, 1.6, 1.8])
Pn_final=np.array([ 0. , 0. , 0.03 , 0.35 , 0.47, 0.57 , 0.68, 0.73, 0.76 , 0.85 , 0.91])
Pd_final=np.array([ 0. , 0.03, 0.36 , 0.85 , 0.97, 0.98 , 0.98 , 0.99 , 1., 1., 1. ])
# cumulative gaussian fit
fg = plt.figure(1); fg.clf()
ax = fg.add_subplot(1, 1, 1)
t = np.linspace(0,2, 1000)
ax.grid(True)
ax.set_ylabel("Cumulative Probability Density")
ax.set_title("Fit to Normal Distribution")
mu1,sigma1 = norm.fit(Pn_final) # classical fit
ax.plot(t, norm.cdf(t, mu1, sigma1), alpha=.5)
mu1,sigma1 = norm.fit(Pd_final) # classical fit
ax.plot(t, norm.cdf(t, mu1, sigma1), alpha=.5)
ax.plot(testrefratios, Pn_final, 'bo',label='numerosity comparison')
ax.plot(testrefratios, Pd_final, 'ro', label='density comparison')
plt.legend(loc='lower right')
fg.canvas.draw()
Output:
At the moment, nothing you're doing is telling the system that you're trying to fit a cumulative Gaussian. norm.fit(Pn_final) is doing its best under the assumption that Pn_final represents a Gaussian.
One way would be to use scipy.optimize.curve_fit, and adding
from scipy.optimize import curve_fit
mu1,sigma1 = curve_fit(norm.cdf, testrefratios, Pn_final, p0=[0,1])[0]
ax.plot(t, norm.cdf(t, mu1, sigma1), alpha=.5)
mu1,sigma1 = curve_fit(norm.cdf, testrefratios, Pd_final, p0=[0,1])[0]
ax.plot(t, norm.cdf(t, mu1, sigma1), alpha=.5)
gives me
which at least looks more believable.

Matplotlib radar chart

so I was experimenting with this matplotlib example. In the data part I tried to make the graphic from a tuple like this:
data = data[0:8]
f1_CO = [0.88, 0.02, 0.02, 0.02, 0.00, 0.05, 0.00, 0.05, 0.00]
f1_O3 = [0.89, 0.01, 0.07, 0.00, 0.00, 0.05, 0.00, 0.00, 0.03]
f1_both = [0.86, 0.01, 0.08, 0.00, 0.00, 0.04, 0.00, 0.00, 0.01]
But it go wrong because it says:
ValueError: x and y must have same first dimension
So, what do I have to change to make it possible to make a plot from a tuple?
The complete code is :
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
from pylab import *
import wave
import struct
def radar_factory(num_vars, frame='polygon'):
"""Create a radar chart with `num_vars` axes.
"""
# calculate evenly-spaced axis angles
theta = 2*pi * linspace(0, 1-1/int(num_vars), num_vars)
#print theta
#print
# rotate theta such that the first axis is at the top
theta += pi/2
def draw_poly_frame(self, x0, y0, r):
# TODO: should use transforms to convert (x, y) to (r, theta)
verts = [(r*cos(t) + x0, r*sin(t) + y0) for t in theta]
return Polygon(verts, closed=True)
def draw_circle_frame(self, x0, y0, r):
return Circle((x0, y0), r)
frame_dict = {'polygon': draw_poly_frame, 'circle': draw_circle_frame}
if frame not in frame_dict:
raise ValueError, 'unknown value for `frame`: %s' % frame
class RadarAxes(PolarAxes):
"""Class for creating a radar chart (a.k.a. a spider or star chart)
http://en.wikipedia.org/wiki/Radar_chart
"""
name = 'radar'
# use 1 line segment to connect specified points
RESOLUTION = 1
# define draw_frame method
draw_frame = frame_dict[frame]
def fill(self, *args, **kwargs):
"""Override fill so that line is closed by default"""
closed = kwargs.pop('closed', True)
return super(RadarAxes, self).fill(closed=closed, *args,**kwargs)
def plot(self, *args, **kwargs):
"""Override plot so that line is closed by default"""
lines = super(RadarAxes, self).plot(*args, **kwargs)
for line in lines:
self._close_line(line)
def _close_line(self, line):
x, y = line.get_data()
# FIXME: markers at x[0], y[0] get doubled-up
if x[0] != x[-1]:
x = concatenate((x, [x[0]]))
y = concatenate((y, [y[0]]))
line.set_data(x, y)
def set_varlabels(self, labels, rvals, rlabels):
self.set_thetagrids(theta * 180/pi, labels)
self.set_rgrids(rvals, labels=rlabels, size='small')
def get_axes_patch(self):
x0, y0 = (0.5, 0.5)
r = 0.5
return self.draw_frame(x0, y0, r)
register_projection(RadarAxes)
return theta
if __name__ == '__main__':
w = wave.open('C:/Python27/demo1.wav','r')
nf = w.getnframes()
sw = w.getsampwidth()
assert(sw==2)
rf = w.readframes(nf)
w.close()
data = struct.unpack("%sh" %nf,rf)
for i in range(9):
print i,data[i]
N = 9
theta = radar_factory(N)
data = data[0:8]
f1_CO = [0.88, 0.02, 0.02, 0.02, 0.00, 0.05, 0.00, 0.05, 0.00]
f1_O3 = [0.89, 0.01, 0.07, 0.00, 0.00, 0.05, 0.00, 0.00, 0.03]
f1_both = [0.86, 0.01, 0.08, 0.00, 0.00, 0.04, 0.00, 0.00, 0.01]
f2_base = [0.07, 0.95, 0.04, 0.05, 0.00, 0.02, 0.01, 0.00, 0.00]
f2_CO = [0.08, 0.94, 0.04, 0.02, 0.00, 0.01, 0.12, 0.04, 0.00]
f2_O3 = [0.07, 0.95, 0.05, 0.04, 0.00, 0.02, 0.12, 0.00, 0.00]
f2_both = [0.09, 0.95, 0.02, 0.03, 0.00, 0.01, 0.13, 0.06, 0.00]
f3_base = [0.01, 0.02, 0.85, 0.19, 0.05, 0.10, 0.00, 0.00, 0.00]
f3_CO = [0.01, 0.01, 0.79, 0.10, 0.00, 0.05, 0.00, 0.31, 0.00]
f3_O3 = [0.01, 0.02, 0.86, 0.27, 0.16, 0.19, 0.00, 0.00, 0.00]
f3_both = [0.01, 0.02, 0.71, 0.24, 0.13, 0.16, 0.00, 0.50, 0.00]
f4_base = [0.01, 0.01, 0.02, 0.71, 0.74, 0.70, 0.00, 0.00, 0.00]
f4_CO = [0.00, 0.02, 0.03, 0.38, 0.31, 0.31, 0.00, 0.59, 0.00]
f4_O3 = [0.01, 0.03, 0.00, 0.32, 0.29, 0.27, 0.00, 0.00, 0.95]
f4_both = [0.01, 0.03, 0.00, 0.28, 0.24, 0.23, 0.00, 0.44, 0.88]
f5_base = [0.02, 0.01, 0.07, 0.01, 0.21, 0.12, 0.98, 0.00, 0.00]
f5_CO = [0.02, 0.02, 0.11, 0.47, 0.69, 0.58, 0.88, 0.00, 0.00]
f5_O3 = [0.02, 0.00, 0.03, 0.37, 0.56, 0.47, 0.87, 0.00, 0.00]
f5_both = [0.02, 0.00, 0.18, 0.45, 0.64, 0.55, 0.86, 0.00, 0.16]
fig = figure(figsize=(9,9))
fig.subplots_adjust(wspace=0.25, hspace=0.20)
axlist = []
axisNum = 0
bases = [data, f2_base, f3_base, f5_base, f4_base]
COs = [f1_CO, f2_CO, f3_CO, f4_CO, f5_CO]
O3s = [f1_O3, f2_O3, f3_O3, f4_O3, f5_O3]
boths = [f1_both, f2_both, f3_both, f4_both, f5_both]
everything = [bases, COs, O3s, boths]
titles = ['Muestreo 1', 'Muestreo 2', 'Muestreo 3', 'Muestreo 4']
colors = ['b', 'r', 'g', 'm', 'y']
for row in range(2):
for col in range(2):
axisNum += 1
if axisNum == 2:
#Unfortunately, it looks like the loc keyword to legend() is
#relative to a specific subplot, rather than the figure itself.
#So, the positioning seen looks good, but if you resize the
#figure to be larger the legend becomes obviously bound to a
#specific subplot. This is in contrast to how the position works
#in something like figtext(). Had trouble using figlegend(), but
#need to try some more...
legend(('Factor 1', 'Factor 2', 'Factor 3', 'Factor 4',
'Factor 5'), loc=(0.95, 0.895), borderpad=0.01,
shadow=False, prop=matplotlib.font_manager
.FontProperties(size='smaller'), markerscale=0.4)
data = everything[axisNum-1]
ax = fig.add_subplot(2, 2, axisNum, projection='radar')
ax.set_title(titles[axisNum-1], weight='bold', size='medium',
horizontalalignment='center',
verticalalignment='center',
position=(0.5, 1.1))
p1 = ax.plot(theta, data[0], color=colors[0])
p2 = ax.plot(theta, data[1], color=colors[1])
p3 = ax.plot(theta, data[2], color=colors[2])
p4 = ax.plot(theta, data[3], color=colors[3])
p5 = ax.plot(theta, data[4], color=colors[4])
ax.fill(theta, data[0], facecolor=colors[0])
ax.fill(theta, data[1], facecolor=colors[1])
ax.fill(theta, data[2], facecolor=colors[2])
ax.fill(theta, data[3], facecolor=colors[3])
ax.fill(theta, data[4], facecolor=colors[4])
#axlist.extend(ax) #This does not work because ax is a
#RadarAxesSubplot object, which is not iterable
axlist.append(ax) #append() works because it simply tacks on to
#the list, as opposed to merging items from two
#lists
for patch in ax.patches:
patch.set_alpha(0.25)
figtext(0.5, 0.965, '5-Factor Solution Profiles Across Four Scenarios', ha='center', color='black', weight='bold', size='large')
#Crudely plot the grid lines I want to see: normalized concentrations of
#chemicals range from 0 to 1...
radiiGrid = [0.2, 0.4, 0.6, 0.8]
theta_rgrid = radar_factory(100)
for ax in axlist:
for r in radiiGrid:
radii = repeat(r, 100)
ax.plot(theta_rgrid, radii, color='lightgrey')
# FIXME: legend doesn't work when fill is called
spokeLabels = ['Sulfate', 'Nitrate', 'EC', 'OC1', 'OC2', 'OC3', 'OP',
'CO',
'O3']
radiiLabels = [str(rg) for rg in radiiGrid]
for ax in axlist:
ax.set_varlabels(spokeLabels, radiiGrid, radiiLabels)
show()
The problem doesn't come from the tuple type.
It seems that data[0] (length = 9) and theta (length = 8) haven't the same length (first dimension).
You have to put data = data[0:9]

Spline Interpolation with Python

I wrote the following code to perform a spline interpolation:
import numpy as np
import scipy as sp
x1 = [1., 0.88, 0.67, 0.50, 0.35, 0.27, 0.18, 0.11, 0.08, 0.04, 0.04, 0.02]
y1 = [0., 13.99, 27.99, 41.98, 55.98, 69.97, 83.97, 97.97, 111.96, 125.96, 139.95, 153.95]
x = np.array(x1)
y = np.array(y1)
new_length = 25
new_x = np.linspace(x.min(), x.max(), new_length)
new_y = sp.interpolate.interp1d(x, y, kind='cubic')(new_x)
but I am getting:
ValueError: A value in x_new is below the interpolation range.
in interpolate.py
Any help would be appreciated.
From the scipy documentation on scipy.interpolate.interp1d:
scipy.interpolate.interp1d(x, y, kind='linear', axis=-1, copy=True, bounds_error=True, fill_value=np.nan)
x : array_like. A 1-D array of monotonically increasing real values.
...
The problem is that the x values are not monotonically increasing. In fact they are monotonically decreasing. Let me know if this works and if its still the computation you are looking for.:
import numpy as np
import scipy as sp
from scipy.interpolate import interp1d
x1 = sorted([1., 0.88, 0.67, 0.50, 0.35, 0.27, 0.18, 0.11, 0.08, 0.04, 0.04, 0.02])
y1 = [0., 13.99, 27.99, 41.98, 55.98, 69.97, 83.97, 97.97, 111.96, 125.96, 139.95, 153.95]
new_length = 25
new_x = np.linspace(x.min(), x.max(), new_length)
new_y = sp.interpolate.interp1d(x, y, kind='cubic')(new_x)
You can get this in the following way:
import numpy as np
import scipy as sp
from scipy.interpolate import interp1d
x1 = [1., 0.88, 0.67, 0.50, 0.35, 0.27, 0.18, 0.11, 0.08, 0.04, 0.04, 0.02]
y1 = [0., 13.99, 27.99, 41.98, 55.98, 69.97, 83.97, 97.97, 111.96, 125.96, 139.95, 153.95]
# Combine lists into list of tuples
points = zip(x1, y1)
# Sort list of tuples by x-value
points = sorted(points, key=lambda point: point[0])
# Split list of tuples into two list of x values any y values
x1, y1 = zip(*points)
new_length = 25
new_x = np.linspace(min(x1), max(x1), new_length)
new_y = sp.interpolate.interp1d(x1, y1, kind='cubic')(new_x)
I've just got the above error and fixed it with remove duplicated value in the X and Y array.
x = np.sort(np.array([0, .2, .2, .4, .6, .9]))
y = np.sort(np.sort(np.array([0, .1, .06, .11, .25, .55]))
⬇ Change 0.2 to 0.3 or any number.
x = np.sort(np.array([0, .2, .3, .4, .6, .9]))
y = np.sort(np.sort(np.array([0, .1, .06, .11, .25, .55]))

Categories