I'm trying to write an algorithm in Python that predicts the output of a sine wave. For example, if the input is 90 (in degrees), the output is 1.
When I try Linear Regression, the output is pretty bad.
[in]
import pandas as pd
from sklearn.linear_model import LinearRegression
dic = [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360]
dc = [0, 0.5, 0.866, 1, .866, 0.5, 0, -0.5, -0.866, -1, -0.866, -0.5, 0]
test = [1, 10, 100]
df = pd.DataFrame(dic)
dfy = pd.DataFrame(dc)
test = pd.DataFrame(test)
clf = LinearRegression()
clf.fit(df, dfy)
[out]
[[0.7340967 ]
[0.69718681]
[0.32808791]]
And Logistic doesn't fit at all because it is for classification. What approaches would be better suited to this problem?
Here is a graphical non-linear fitter using your data and a sine function. The numpy sine function uses radians, so the sine function used here rescales the input. I guessed the initial parameter estimates by looking at a scatterplot of the data, and from the RMSE of nearly 0.0 and the R-squared of almost 1.0 the data would seem to have no noise component.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
dic = [0.0, 30.0, 60.0, 90.0, 120.0, 150.0, 180.0, 210.0, 240.0, 270.0, 300.0, 330.0, 360.0]
dc = [0.0, 0.5, 0.866, 1.0, 0.866, 0.5, 0.0, -0.5, -0.866, -1.0, -0.866, -0.5, 0.0]
# rename data to match previous example code
xData = dic
yData = dc
def func(x, amplitude, center, width):
return amplitude * numpy.sin(numpy.pi * (x - center) / width)
# these are estimated from a scatterplot of the data
initialParameters = numpy.array([-1.0, 180.0, 180.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Related
I'm having trouble fitting a curve with only one parameter using scipy.opt.curve_fit:
import scipy.optimize as opt
import numpy as np
def func(T):
return 76.881324*np.exp((-L)/(8.314*T))
best_params, cov_matrix = opt.curve_fit(func, xdata = x, ydata = y, p0=[])
I have arrays of values, x (T in the below eq) and y (P) that I'm trying to fit to the equation
but it seems it wants func() to have more than one argument. How do I fix this?
Here is a graphical Python fitter using your equation with some test data. Replace the example data with your own and you should be done.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7])
yData = numpy.array([1.1, 20.2, 30.3, 60.4, 50.0, 60.6, 70.7])
def func(T, L):
return 76.881324*numpy.exp((-L)/(8.314*T))
# all "1.0" is the same as the scipy defaults
initialParameters = numpy.array([1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
Say I want to fit a polynomial model of degree d via least squares regression. There are two methods I've learned in python. One uses numpy and the other sklearn. After I fit the model and get the coefficients, to predict values for test data, in sklearn, I can do:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train, y_train) # Fitting on Training Data
model.predict(20) #One value in test data is 20
What is the numpy equivalent for model.predict() after I fit the model using:
import numpy.polynomial.polynomial as poly
np_model = poly.polyfit(x_train, y_train, d)
I use numpy.polyval, docs are at https://docs.scipy.org/doc/numpy/reference/generated/numpy.polyval.html - here is a graphical polynomial fitter as an example that uses polyval.
import numpy, matplotlib
import matplotlib.pyplot as plt
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
polynomialOrder = 2 # example quadratic
# curve fit the test data
fittedParameters = numpy.polyfit(xData, yData, polynomialOrder)
print('Fitted Parameters:', fittedParameters)
modelPredictions = numpy.polyval(fittedParameters, xData)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = numpy.polyval(fittedParameters, xModel)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I am studying nonlinear curvefit with python.
I made example like below.
But the optimized plot is not drawn well
plt.plot(basketCont, fittedData)
I guess the optimized parametes are not good also.
Could you give some recommends? Thank you.
import matplotlib
matplotlib.use('Qt4Agg')
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import numpy as np
from scipy.optimize import curve_fit
def func(x, a, b, c):
return a - b* np.exp(c * x)
baskets = np.array([475, 108, 2, 38, 320])
scaling_factor = np.array([95.5, 57.7, 1.4, 21.9, 88.8])
popt,pcov = curve_fit(func, baskets, scaling_factor)
print (popt)
print (pcov)
basketCont=np.linspace(min(baskets),max(baskets),50)
fittedData=[func(x, *popt) for x in basketCont]
fig1 = plt.figure(1)
plt.scatter(baskets, scaling_factor, s=5)
plt.plot(basketCont, fittedData)
plt.grid()
plt.show()
I personally could not get a good fit to your data using the equation you posted, however the Hill sigmoidal equation gave a good fit. Here is the Python code for the graphical fitter I used.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import warnings
baskets = numpy.array([475.0, 108.0, 2.0, 38.0, 320.0])
scaling_factor = numpy.array([95.5, 57.7, 1.4, 21.9, 88.8])
# rename data for simpler code re-use later
xData = baskets
yData = scaling_factor
def func(x, a, b, c): # Hill sigmoidal equation from zunzun.com
return a * numpy.power(x, b) / (numpy.power(c, b) + numpy.power(x, b))
# these are the same as the scipy defaults
initialParameters = numpy.array([1.0, 1.0, 1.0])
# do not print unnecessary warnings during curve_fit()
warnings.filterwarnings("ignore")
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I have a data set that I know fits to a curve of the form:
y = a x²
and I want to extract the value of a.
What's the best way to go about this in Python (with scipy etc.) ?
Here is a graphical fitter example using scipy's curve_fit():
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7])
yData = numpy.array([1.1, 20.2, 30.3, 60.4, 50.0, 60.6, 70.7])
def func(x, a):
return (a * numpy.square(x))
# same as the scipy default
initialParameters = numpy.array([1.0])
# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I have a function Polyfit which I want it to get the data here x and y and return a 2D line fitted to that data using linear regression. I get a good result but it's too good and I don't know if I am doing it correctly all the way to the end.
#creating the data and plotting them
np.random.seed(0)
N = 10 # number of data points
x = np.linspace(0,2*np.pi,N)
y = np.sin(x) + np.random.normal(0,.3,x.shape)
plt.figure()
plt.plot(x,y,'o')
plt.xlabel('x')
plt.ylabel('y')
plt.title('2D data (#data = %d)' % N)
plt.show()
def polyfit(x,y,degree,delta):
#x,y
X = np.vstack([np.ones(x.shape), x, y]).T
Y = np.vstack([y]).T
XtX = np.dot(X.T, X)
XtY = np.dot(X.T, Y)
theta = np.dot(np.linalg.inv(XtX), XtY)
degree = theta.shape[0]
delta = theta.T * theta
x_theta = X.T * theta
pred = np.sum([theta* x])
loss = np.dot((Y.T - x_theta).T, (Y.T - x_theta))
c = theta[0] + theta[1] * x[1] + theta[2] * math.pow(x[2],2)
return pred
result = polyfit(x,y,2,2)
fin = y - result
plt.plot(x, fin, 'go--')
Data image:
Result of the fitted line:
Here is a graphing example polynomial fitter using numpy's polyfit() for the fit and numpy's polyval() to calculate model predictions, along with RMSE and R-squared values.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
polynomialOrder = 2 # example quadratic
# curve fit the test data
fittedParameters = numpy.polyfit(xData, yData, polynomialOrder)
print('Fitted Parameters:', fittedParameters)
modelPredictions = numpy.polyval(fittedParameters, xData)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = numpy.polyval(fittedParameters, xModel)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)