Python multivariable nonlinear regression calculation - python

i was trying to make a curve fit of data and want to find nonlinear regression equation.
thats what my plot looks like i got x,y data which will be my reference data,
then i got x0 and y0 which will be my second point,
dx and dy will be difference between them
when i show this as vector it showed form of
when i convert dx,dy to R and theta it showed x^2+y^2 form,
is it possible to find those equation with it?
here's my current code
import matplotlib.pyplot as plt
import numpy as np
import math
import seaborn as sns
from statsmodels.formula.api import ols
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
"""setting dpi for graph shown in editor"""
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300
import pandas as pd
"""reading data from excel sheet 1"""
df = pd.read_excel(r'C:\Users\JRKIM\Desktop\data\2513data.xlsx')
"""variable selection"""
tx_0 = df.loc[:,'TRUE_x_0']
ty_0 = df.loc[:,'TRUE_y_0']
v_x_0 = df.loc[:,'vx']
v_y_0 = df.loc[:,'vy']
dx0_0 = tx_0-v_x_0
dy0_0 = ty_0-v_y_0
dr0_0 = df.loc[:,'dr']
fig1, ax0 = plt.subplots()
ax0.set_title("delta0 in vector")
qk = ax0.quiver(tx_0,ty_0,dx0_0,dy0_0)
ax0.scatter(tx_0, ty_0, color='r', s=1)
"""3d graph with vector and position """
fig4 = plt.figure()
ax4 = fig4.add_subplot(111, projection='3d')
ax4.scatter(tx_0, ty_0, dr0_0, marker='*',linewidth = 0.01, cmap="jet")
ax4.set_xlabel('X Label')
ax4.set_ylabel('Y Label')
ax4.set_zlabel('dr Label')

you can use scipy.optimize.curve_fit as follows.
from scipy.optimize import curve_fit
def quadratic_function(x, a, b):
return a * x[0]**2 + b * x[1]**2 # + c * x[0]*x[1] if you want to satisfy quadratic form
xdata = np.vstack([np.array(dx0_0).flatten(),np.array(dy0_0).flatten()])
ydata = np.array(dr0_0).flatten()
popt, pcov = curve_fit(quadratic_function,xdata,ydata)
print(popt) # values for a and b
this should get you the coefficients for a and b for the least squares fitting of a*x**2 + b*y**2 = r, you have to further calculate the error and see if the error is low enough for your liking.
Edit: corrected the dimensions of inputs

Related

curve fitting to estimate parameters in python

#for months, I have been trying to fit a model to x and y data in python, but could not #achieve that. The aim is to estimate P1 and P2. See one of codes that fails below.
import numpy as np
from numpy import*
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
#IMPORTING DATA
Xdata = np.array([2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,20.0,22.0,24.0])
Ydata = np.array([0.19,1.25,2.53,4.27,7.02,9.03,11.05,14.85,15.23,17.32,18.71,19.91])
#Plotting data
plt.figure(figsize=(6,3))
plt.xlim(0.0,25.0)
plt.ylim(0.0,25.0)
plt.xlabel("Time (hour)")
plt.ylabel("Data")
plt.title("plotting Data")
plt.plot(Xdata,Ydata, 'or',ms='10', label = 'Data')
#definining fitting function
def Q1(X,P1,P2):
lambda1=sqrt(X/P2)
top = P1*CL*A
bottom = lambda1*sinh(lambda1)
return top/bottom/X
CL = 1.0 # umol/mL == umol/cm^3
A = 1.0 # cm^2
#Optiminization
popt,pcov = curve_fit(Q1,Xdata,Ydata)
print(popt)
#plot the fitted function
Xstart = 1
Xstop = 24
increment = 0.1
xfit = np.arange(Xstart,Xstop,increment)
yfit = Q1(xfit,*popt)
plt.plot(xfit,yfit,'b',label = "fit")
plt.legend()

catastrophic fit with python curve_fit

I need to fit data (x axes: sigma, y axes : Mbh) with an exponential model. This is my code:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
#define my data
Mbh = np.array([1.8e6,2.5e6,4.5e7,3.7e7,4.4e7,1.5e7,1.4e7,4.1e7, 1.0e9,2.1e8,1.0e8,1.0e8,1.6e7,1.9e8,3.9e7,5.2e8,3.1e8,3.0e8,7.0e7,1.1e8,3.0e9,5.6e7,7.8e7,2.0e9,1.7e8,1.4e7,2.4e8,5.3e8,3.3e8,3.5e6,2.5e9])
sigma = np.array([103,75,160,209,205,151,175,140,230,205,145,206,143,182,130,315,242,225,186,190,375,162,152,385,177,90,234,290,266,67,340])
#define my model to fit
def Mbh02(alpha, sigma, beta):
return alpha * np.exp(beta*sigma);
#calculate the fit parameter:
#for second model
popt02, pcov02 = curve_fit(Mbh02, sigma, Mbh, p0=[1, 0.058])
print(f'Parameter of the second function : {popt02}')
sigma_plot = [103,75,160,209,205,151,175,140,230,205,145,206,143,182,130,315,242,225,186,190,375,162,152,385,177,90,234,290,266,67,340]
sigma_plot.sort()
sigma_plot = np.array(sigma_plot)
#plot model with data with
plt.figure(figsize=(6,6))
plt.scatter(sigma, Mbh * 1e-9, marker = '+', color ='black', label = 'Data')
plt.plot(sigma_plot , Mbh02(alpha = popt02[0], sigma = sigma_plot, beta = popt02[1]) * 1e-9, color='orange', ls ='-', label ='2. fit')
plt.ylabel(r'$M_{BH}$ in $M_\odot *10^9$ unit', fontsize=16)
plt.xlabel(r'$\sigma$', fontsize=16)
# plt.ylim(-1,10)
plt.title('Plot of the black hole mass $M_{BH}$ \nagainst the velocity dispersion $\sigma$ \nfor different elliptical galaxies', fontsize=18)
plt.grid(True)
plt.legend()
plt.show()
and I get the following parameter
:
print(popt01) = [16.13278858 0.91788691]
which looks :
If I try to find the parameter manually, and plotting them with:
plt.plot(sigma_plot , (1 * np.exp(0.058 * sigma_plot)) * 1e-9, ls ='--', label ='2. fit manual')
I get the following plot which is much better:
What is the problem ? Why is curve_fit not working and giving such parameter ?
In the curve_fit documentation, it says
Assumes ydata = f(xdata, *params) + eps
So if you change your function definition so that the x data is first in your function, it will work:
def Mbh02(sigma, alpha, beta):
return alpha * np.exp(beta*sigma);
# Rest of code
plt.plot(sigma_plot , Mbh02(sigma_plot, *popt02) * 1e-9, color='orange', ls ='-')
Have you tried fitting the log(Mbh) with a linear fit instead of fitting the exp. model directly? This usually gives a lot of stability.
import numpy as np
import matplotlib.pyplot as plt
Mbh = np.array([1.8e6,2.5e6,4.5e7,3.7e7,4.4e7,1.5e7,1.4e7,4.1e7, 1.0e9,2.1e8,1.0e8,1.0e8,1.6e7,1.9e8,3.9e7,5.2e8,3.1e8,3.0e8,7.0e7,1.1e8,3.0e9,5.6e7,7.8e7,2.0e9,1.7e8,1.4e7,2.4e8,5.3e8,3.3e8,3.5e6,2.5e9])
sigma = np.array([103,75,160,209,205,151,175,140,230,205,145,206,143,182,130,315,242,225,186,190,375,162,152,385,177,90,234,290,266,67,340])
plt.figure(2)
plt.plot(sigma,Mbh,'.')
lnMbh= np.log(Mbh)
p = np.polyfit(sigma,lnMbh,1)
plt.plot(sigma, np.exp(np.polyval(p,sigma)),'*')
alpha = np.log(p[0])
beta = p[1]

Fitting 3D Gaussian to data set

I have a data file with first column x, second coulmn y and third column z. I can call these values via
x=mat0[:,0]
That is not the problem. I can also create and plot a 3D Gaussian with these data or (as you see in my script below) via definition of the function "twoD_Gauss".
Now I want to fit this function "twoD_Gauss" to the dataset (x,y,z) and print out the values for amplitude sigma etc.
This is what I got:
from matplotlib import pyplot;
from pylab import genfromtxt;
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from numpy.random import randn
from scipy import array, newaxis
# Load file into mat0
mat0 = genfromtxt("0005.map");
fig = plt.figure(figsize=(20,10))
############ 3D ###############
ax = fig.add_subplot(1, 2, 2, projection='3d')
#Load data
mat0 = genfromtxt("0005.map");
# define Gaussian
def twoD_Gauss((x,y),amplitude,x0,y0,sigma_x,sigma_y,offset):
x0=float(x0)
y0=float(y0)
return offset + amplitude*np.exp(-(((x-x0)**(2)/(2*sigma_x**(2))) + ((y-y0)**(2)/(2*sigma_y**(2)))))
#define x and y and z (z not used, x and y shifted)
x = mat0[:,0]-150
y = mat0[:,1]-143
z = mat0[:,2]
#create data
data = twoD_Gauss((x, y), 15, 0, 0, 20, 20, 10)
# plot twoD_Gaussian data generated above
ax = plt.axes(projection='3d')
ax.plot_trisurf(x, y, data, cmap="jet", linewidth=0)
#FITTING HELP!
initial_guess = (24000,0,0,25,25,6000)
params, pcov = opt.curve_fit(twoD_Gauss, (x,y), data,initial_guess)
print(params)
plt.show()
I think I did it correct, but its actually not fitting.
The printed params are the parameters I gave in data.
Ok I found the solution myself:
My problem was that I fitted to the data but data was already defined. So what I did is, I changed data to the z-data given in my file.
data=mat0[:,2]
Now the curve_fit fits the twoD_Gauss via (x,y) to the given z-values.

Python - Matplotlib: normalize axis when plotting a Probability Density Function

I'm using Python and some of its extensions to get and plot the Probability Density Function. While I manage to plot it, in its form, at least, I don't manage to succeed on scalating the axis.
import decimal
import numpy as np
import scipy.stats as stats
import pylab as pl
import matplotlib.pyplot as plt
from decimal import *
from scipy.stats import norm
lines=[]
fig, ax = plt.subplots(1, 1)
mean, var, skew, kurt = norm.stats(moments='mvsk')
#Here I delete some lines aimed to fill the list with values
Long = len(lines)
Maxim = max(lines) #MaxValue
Minim = min(lines) #MinValue
av = np.mean(lines) #Average
StDev = np.std(lines) #Standard Dev.
x = np.linspace(Minim, Maxim, Long)
ax.plot(x, norm.pdf(x, av, StDev),'r-', lw=3, alpha=0.9, label='norm pdf')
weights = np.ones_like(lines)/len(lines)
ax.hist(lines, weights = weights, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()
The result is
While I would like to have it expressed
- In the x-axis centered in 0 and related to the standard deviation
- In the y-axis, related to the histogram and the %s (normalized to 1)
For the x-axis as the image below
And like this last image for the y-axis
I've managed to escalate the y-axis in a histogram by plotting it individually with the instruction weights = weights and setting it into the plot, but I can't do it here. I include it in the code but actually it does nothing in this case.
Any help would be appreciated
the y-axis is normed in a way, that the area under the curve is one.
And adding equal weights for every data point makes no sense if you normalize anyway with normed=True.
first you need to shift your data to 0:
lines -= mean(lines)
then plot it.
ythis should be a working minimal example:
import numpy as np
from numpy.random import normal
import matplotlib.pyplot as plt
from scipy.stats import norm
# gaussian distributed random numbers with mu =4 and sigma=2
x = normal(4, 2, 10000)
mean = np.mean(x)
sigma = np.std(x)
x -= mean
x_plot = np.linspace(min(x), max(x), 1000)
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.hist(x, bins=50, normed=True, label="data")
ax.plot(x_plot, norm.pdf(x_plot, mean, sigma), 'r-', label="pdf")
ax.legend(loc='best')
x_ticks = np.arange(-4*sigma, 4.1*sigma, sigma)
x_labels = [r"${} \sigma$".format(i) for i in range(-4,5)]
ax.set_xticks(x_ticks)
ax.set_xticklabels(x_labels)
plt.show()
output image is this:
and you have too much imports.
you import decimals twice, one time even with *
and then numpy, pyplot and scipy are included in pylab. Also why import the whole scipy.stats and then again import just norm from it?

How can I find the right gaussian curve given some data?

I have code that draws from a gaussian in 1D:
import numpy as np
from scipy.stats import norm
from scipy.optimize import curve_fit
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import gauss
# Beginning in one dimension:
mean = 0; Var = 1; N = 1000
scatter = np.random.normal(mean,np.sqrt(Var),N)
scatter = np.sort(scatter)
mu,sigma = norm.fit(scatter)
I obtain mu and sigma using norm.fit()
Now I'd like to obtain my parameters using
xdata = np.linspace(-5,5,N)
pop, pcov = curve_fit(gauss.gauss_1d,xdata,scatter)
The problem is I don't know how to map my scattered points (drawn from a 1D gaussian) to the x-line in order to use curve_fit.
Also, suppose I simply use and mu and sigma as earlier.
I plot using:
n, bins, patches = plt.hist(scatter,50,facecolor='green')
y = 2*max(n)*mlab.normpdf(bins,mu,sigma)
l = plt.plot(bins,y,'r--')
plt.xlabel('x-coord')
plt.ylabel('Occurrences')
plt.grid(True)
plt.show()
But I have to guess the amplitude as 2*max(n). It works but it's not robust. How can I find the amplitude without guessing?
To avoid guessing the amplitude, call hist() with normed=True, then the amplitude corresponds to normpdf().
For doing a curve fit, I suggest to use not the density but the cumulative distribution: Each sample has a height of 1/N, which successively sum up to 1. This has the advantage that you don't need to group samples in bins.
import numpy as np
from scipy.stats import norm
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
# Beginning in one dimension:
mean = 0; Var = 1; N = 100
scatter = np.random.normal(mean,np.sqrt(Var),N)
scatter = np.sort(scatter)
mu1,sigma1 = norm.fit(scatter) # classical fit
scat_sum = np.cumsum(np.ones(scatter.shape))/N # cumulative samples
[mu2,sigma2],Cx = curve_fit(norm.cdf, scatter, scat_sum, p0=[0,1]) # curve fit
print(u"norm.fit(): µ1= {:+.4f}, σ1={:.4f}".format(mu1, sigma1))
print(u"curve_fit(): µ2= {:+.4f}, σ2={:.4f}".format(mu2, sigma2))
fg = plt.figure(1); fg.clf()
ax = fg.add_subplot(1, 1, 1)
t = np.linspace(-4,4, 1000)
ax.plot(t, norm.cdf(t, mu1, sigma1), alpha=.5, label="norm.fit()")
ax.plot(t, norm.cdf(t, mu2, sigma2), alpha=.5, label="curve_fit()")
ax.step(scatter, scat_sum, 'x-', where='post', alpha=.5, label="Samples")
ax.legend(loc="best")
ax.grid(True)
ax.set_xlabel("$x$")
ax.set_ylabel("Cumulative Probability Density")
ax.set_title("Fit to Normal Distribution")
fg.canvas.draw()
plt.show()
prints
norm.fit(): µ1= +0.1534, σ1=1.0203
curve_fit(): µ2= +0.1135, σ2=1.0444
and plots

Categories