How can I fit an equation which is not a function - python

Given data points in the xy plane, I would like to use scipy.optimize.leastsq to find fit parameters for an ellipse (which cannot be written as a function of x and y). I tried setting the entire equation equal to zero, and then fitting this function, but the fit is failing to converge with error output
"The relative error between two consecutive iterates is at most 0.000000."
The code is shown below, as well as the output. The fitter clearly does not find any reasonable parameters. My question is whether or not this is a problem with scipy.optimize.leastsq, or whether the "trick" of setting the function equal to zero and instead fitting that is not valid.
from scipy.optimize import leastsq, curve_fit
import numpy as np
import matplotlib.pyplot as plt
def function(x,y,theta,smaj,smin):
xp = np.cos(theta)*x - np.sin(theta)*y
yp = np.sin(theta)*x + np.cos(theta)*y
z = ((xp)**2)/smaj**2 + ((yp)**2)/smin**2
return z
def g(x,y,smaj,smin):
return x*x/smaj**2 + y*y/smin**2
def window(array,alt,arange):
arr = [array[i] for i,a in enumerate(alt) if a > arange[0] and a < arange[1]]
return np.asarray(arr)
def fitter(p0,x,y,func,errfunc,err):
# the fitter function
out = leastsq(errfunc,p0,args=(x,y,func,err),full_output=1)
pfinal = out[0]
covar = out[1]
mydict = out[2]
mesg = out[3]
ier = out[4]
resids = mydict['fvec']
chisq = np.sum(resids**2)
degs_frdm = len(x)-len(pfinal)
reduced_chisq = chisq/degs_frdm
ls = [pfinal,covar,mydict,mesg,ier,resids,chisq,degs_frdm,reduced_chisq]
print('fitter status: ', ier, '-- aka -- ', mesg)
i = 0
if covar is not None:
if (ier == 1 or ier == 2 or ier == 3 or ier == 4):
for u in pfinal:
print ('Param', i+1, ': ',u, ' +/- ', np.sqrt(covar[i,i]))
i = i + 1
print ('reduced chisq',reduced_chisq)
else:
print('fitter failed')
return ls
def func(x,y,p):
x = x-p[3]
y = y-p[4]
xp = np.cos(p[0])*(x) - np.sin(p[0])*(y)
yp = np.sin(p[0])*(x) + np.cos(p[0])*(y)
z = ((xp)**2)/p[1]**2 + ((yp)**2)/p[2]**2 - 1
return z
def errfunc(p,x,y,func,err):
return (y-func(x,y,p))/err
t = np.linspace(0,2*np.pi,100)
xx = 5*np.cos(t); yy = np.sin(t)
p0 = [0,5,1,0,0]
sigma = np.ones(len(xx))
fit = fitter(p0,xx,yy,func,errfunc,sigma)
params = fit[0]
covariance = fit[1]
residuals = fit[5]
t = np.linspace(0,2*np.pi,100)
xx = 5*np.cos(t); yy = np.sin(t)
plt.plot(xx,yy,'bx',ms = 4)
xx = np.linspace(-10,10, 1000)
yy = np.linspace(-5, 5, 1000)
newx = []
newy = []
for x in xx:
for y in yy:
if 0.99 < func(x,y,params) < 1.01:
#if g(x,y,5,1) == 1:
newx.append(x)
newy.append(y)
plt.plot(newx,newy,'kx',ms = 1)
plt.show()
The blue crosses are the actual data, and the black line is the fitters guess at the parameters.

Related

Adding Additional Rows To Tensors

I want to simulate a data generating process via tensor methods. In the end, the data will be exported to a csv file such that each row corresponds to a time period and each column corresponds to a unit. The following code
import numpy as np
import pandas as pd
import random
import tensorly as tl
from itertools import product
import matplotlib.pyplot as plt
import tensorly.decomposition
np.random.seed(1812)
# Data Generation
L = 0.05
H = 0.05
dx = 0.0025
dy = 0.0025
tmax = 60
dt = 0.01
epsilon = 0.0001
alpha = 0.5e-5+np.random.random()*1e-5
SimulateData = []
SimulateDataNoNoise = []
r_x = alpha*dt/dx**2
r_y = alpha*dt/dy**2
fo = r_x + r_y
if fo > 0.5:
msg = f'Current Fo = {fo}, which is numerically unstable (>0.5)'
raise ValueError(msg)
# x, y meshgrid based on dx, dy
nx = int(L/dx + 1)
ny = int(H/dy + 1)
X, Y = np.meshgrid(np.linspace(0, L, nx), np.linspace(0, H, ny))
# center point of the domain
ic = int((nx-1)/2)
jc = int((ny-1)/2)
# initial and boundary conditions
S = np.zeros((ny, nx))
def enforceBdy(S):
''' Enforces the boundary conditions on S, the temperature values on the domain's grid points'''
S[:, 0] = 1
S[:, -1] = 1
S[0, :] = 1
S[-1, :] = 1
return S
S = enforceBdy(S)
def Laplace(T):
'''Computes the Laplacian operator, del-squared on the data'''
tmp_x, tmp_y = np.gradient(T, dx, dy)
tmp_x, _ = np.gradient(tmp_x, dx)
_, tmp_y = np.gradient(tmp_y, dy)
return tmp_x+tmp_y
# iteration
nmax = int(tmax/dt)
for n in range(nmax):
dSdt = alpha*Laplace(S)
S = S + dSdt*dt
S = enforceBdy(S)
if n % 100 == 0:
noise = np.random.normal(size=S.shape)*.1
SimulateData.append(S.copy()+noise)
SimulateDataNoNoise.append(S.copy())
# check for convergence
err = np.abs(dSdt*dt).max()
if err <= epsilon:
break
#
# Creates Tensor
X = np.stack(SimulateData, 2)
nx,ny,nt = X.shape
# CP Decomposition
err = []
for i in range(1,11):
CP_Heat = tl.decomposition.parafac(X,i)
reconstructed = tl.kruskal_to_tensor(CP_Heat)
err.append(((X-reconstructed)**2).sum())
AIC1 = [2*e + 2*(i+1) for i,e in enumerate(err)]
AIC2 = [2*e + (i+1)*nx+(i+1)*ny+(i+1)*nt for i,e in enumerate(err)]
AIC = AIC2
idxmin = np.argmin(AIC)
R = idxmin+1
min_AIC = AIC[idxmin]
Y = np.zeros((21,40))
beta = np.random.randint(low=-0,high=15,size=21).reshape(-1,1)
for i in range(40):
RHS = 15 + X[:,:,i]#beta + np.random.normal(size=21).reshape(-1,1)
Y[:,i] = RHS.ravel()
Y
np.savetxt("Sim1.csv", Y, delimiter=",")
Returns a CSV file of 21 rows and 40 columns. Suppose, however, I wanted 40 or 70 rows in the final file with 40 columns. How would I do this? When I try with the number 22
import numpy as np
import pandas as pd
import random
import tensorly as tl
from itertools import product
import matplotlib.pyplot as plt
import tensorly.decomposition
np.random.seed(1812)
# Data Generation
L = 0.05
H = 0.05
dx = 0.0025
dy = 0.0025
tmax = 60
dt = 0.01
epsilon = 0.0001
alpha = 0.5e-5+np.random.random()*1e-5
SimulateData = []
SimulateDataNoNoise = []
r_x = alpha*dt/dx**2
r_y = alpha*dt/dy**2
fo = r_x + r_y
if fo > 0.5:
msg = f'Current Fo = {fo}, which is numerically unstable (>0.5)'
raise ValueError(msg)
# x, y meshgrid based on dx, dy
nx = int(L/dx + 1)
ny = int(H/dy + 1)
X, Y = np.meshgrid(np.linspace(0, L, nx), np.linspace(0, H, ny))
# center point of the domain
ic = int((nx-1)/2)
jc = int((ny-1)/2)
# initial and boundary conditions
S = np.zeros((ny, nx))
def enforceBdy(S):
''' Enforces the boundary conditions on S, the temperature values on the domain's grid points'''
S[:, 0] = 1
S[:, -1] = 1
S[0, :] = 1
S[-1, :] = 1
return S
S = enforceBdy(S)
def Laplace(T):
'''Computes the Laplacian operator, del-squared on the data'''
tmp_x, tmp_y = np.gradient(T, dx, dy)
tmp_x, _ = np.gradient(tmp_x, dx)
_, tmp_y = np.gradient(tmp_y, dy)
return tmp_x+tmp_y
# iteration
nmax = int(tmax/dt)
for n in range(nmax):
dSdt = alpha*Laplace(S)
S = S + dSdt*dt
S = enforceBdy(S)
if n % 100 == 0:
noise = np.random.normal(size=S.shape)*.1
SimulateData.append(S.copy()+noise)
SimulateDataNoNoise.append(S.copy())
# check for convergence
err = np.abs(dSdt*dt).max()
if err <= epsilon:
break
#
# Creates Tensor
X = np.stack(SimulateData, 2)
nx,ny,nt = X.shape
# CP Decomposition
err = []
for i in range(1,11):
CP_Heat = tl.decomposition.parafac(X,i)
reconstructed = tl.kruskal_to_tensor(CP_Heat)
err.append(((X-reconstructed)**2).sum())
AIC1 = [2*e + 2*(i+1) for i,e in enumerate(err)]
AIC2 = [2*e + (i+1)*nx+(i+1)*ny+(i+1)*nt for i,e in enumerate(err)]
AIC = AIC2
idxmin = np.argmin(AIC)
R = idxmin+1
min_AIC = AIC[idxmin]
Y = np.zeros((22,40))
beta = np.random.randint(low=-0,high=15,size=22).reshape(-1,1)
for i in range(40):
RHS = 15 + X[:,:,i]#beta + np.random.normal(size=22).reshape(-1,1)
Y[:,i] = RHS.ravel()
Y
np.savetxt("Sim1.csv", Y, delimiter=",")
Python throws an exception saying "(size 22 is different from 21)", but I'm unclear on where the 21 comes from when I do not specify the number 21 anywhere in my code.

Drawing Poincare Section using Python

I was about to plot a Poincare section of the following DE, which is quite meaningful to have a periodic potential function V(x) = - cos(x) in this equation.
After calculating the solution using RK4 with time interval dt = 0.001, the one that python drew was as the following plot.
But according to the textbook(referred to 2E by J.M.T. Thompson and H.B. Stewart), the section would look like as
:
it has so much difference. For my personal opinion, since Poincare section does not appear as what writers draw, there must be some error in my code. However, I actually done for other forced oscillation DE, including Duffing's equation, and obtained the identical one as those in the textbook. So, I was wodering if there are some typos in the equation given by the textbook, or somewhere else. I posted my code, but might be quite messy to understand. So appreicate dealing with it.
import numpy as np
import matplotlib.pylab as plt
import matplotlib as mpl
import sys
import time
state = [1]
def print_percent_done(index, total, state, title='Please wait'):
percent_done2 = (index+1)/total*100
percent_done = round(percent_done2, 1)
print(f'\t⏳{title}: {percent_done}% done', end='\r')
if percent_done2 > 99.9 and state[0]:
print('\t✅'); state = [0]
####
no = 1
####
def multiple(n, q):
m = n; i = 0
while m >= 0:
m -= q
i += 1
return min(abs(n - (i - 1)*q), abs(i*q - n))
# system(2)
#Basic info.
filename = 'sinPotentialWell'
# a = 1
# alpha = 0.01
# w = 4
w0 = .5
n = 1000000
h = .01
t_0 = 0
x_0 = 0.1
y_0 = 0
A = [(t_0, x_0, y_0)]
def f(t, x, y):
return y
def g(t, x, y):
return -0.5*y - np.sin(x) + 1.1*np.sin(0.5*t)
for i in range(n):
t0 = A[i][0]; x0 = A[i][1]; y0 = A[i][2]
k1 = f(t0, x0, y0)
u1 = g(t0, x0, y0)
k2 = f(t0 + h/2, x0 + h*k1/2, y0 + h*u1/2)
u2 = g(t0 + h/2, x0 + h*k1/2, y0 + h*u1/2)
k3 = f(t0 + h/2, x0 + h*k2/2, y0 + h*u2/2)
u3 = g(t0 + h/2, x0 + h*k2/2, y0 + h*u2/2)
k4 = f(t0 + h, x0 + h*k3, y0 + h*u3)
u4 = g(t0 + h, x0 + h*k3, y0 + h*u3)
t = t0 + h
x = x0 + (k1 + 2*k2 + 2*k3 + k4)*h/6
y = y0 + (u1 + 2*u2 + 2*u3 + u4)*h/6
A.append([t, x, y])
if i%1000 == 0: print_percent_done(i, n, state, 'Solving given DE')
#phase diagram
print('showing 3d_(x, y, phi) graph')
PHI=[[]]; X=[[]]; Y=[[]]
PHI_period1 = []; X_period1 = []; Y_period1 = []
for i in range(n):
if w0*A[i][0]%(2*np.pi) < 1 and w0*A[i-1][0]%(2*np.pi) > 6:
PHI.append([]); X.append([]); Y.append([])
PHI_period1.append((w0*A[i][0])%(2*np.pi)); X_period1.append(A[i][1]); Y_period1.append(A[i][2])
phi_period1 = np.array(PHI_period1); x_period1 = np.array(X_period1); y_period1 = np.array(Y_period1)
print('showing Poincare Section at phi=0')
plt.plot(x_period1, y_period1, 'gs', markersize = 2)
plt.plot()
plt.title('phi=0 Poincare Section')
plt.xlabel('x'); plt.ylabel('y')
plt.show()
If you factor out some of the computation blocks, you can make the code more flexible and computations more direct. No need to reconstruct something if you can construct it in the first place. You want to catch the points where w0*t is a multiple of 2*pi, so just construct the time loops so you integrate in chunks of 2*pi/w0 and only remember the interesting points.
num_plot_points = 2000
h = .01
t,x,y = t_0,x_0,y_0
x_section,y_section = [],[]
T = 2*np.pi/w0
for k in range(num_plot_points):
t = 0;
while t < T-1.2*h:
x,y = RK4step(t,x,y,h)
t += h
x,y = RK4step(t,x,y,T-t)
if k%100 == 0: print_percent_done(k, num_plot_points, state, 'Solving given DE')
x_section.append(x); y_section.append(y)
with RK4step just containing the code of the RK4 step.
This will not solve the mystery. The veil gets lifted if you consider that x is the angle theta (of a forced pendulum with friction) on a circle. Thus to get points with the same spacial location it needs to be reduced by multiples of 2*pi. Doing that,
plt.plot([x%(2*np.pi) for x in x_section], y_section, 'gs', markersize = 2)
results in the expected plot

How to animate this optimization model correctly

I have implemented a simple randomized, population-based optimization method - Grey Wolf optimizer. I am having some trouble with properly capturing the Matplotlib plots at each iteration using the camera package.
I am running GWO for the objective function f(x,y) = x^2 + y^2. I can only see the candidate solutions converging to the minima, but the contour plot doesn't show up.
Do you have any suggestions, how can I display the contour plot in the background?
GWO Algorithm implementation
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
from celluloid import Camera
import ffmpeg
import pillow
# X : Position vector of the initial population
# n : Initial population size
def gwo(f,max_iterations,LB,UB):
fig = plt.figure()
camera = Camera(fig)
def random_population_uniform(m,a,b):
dims = len(a)
x = [list(a + np.multiply(np.random.rand(dims),b - a)) for i in range(m)]
return np.array(x)
def search_agent_fitness(fitness):
alpha = 0
if fitness[1] < fitness[alpha]:
alpha, beta = 1, alpha
else:
beta = 1
if fitness[2] > fitness[alpha] and fitness[2] < fitness[beta]:
beta, delta = 2, beta
elif fitness[2] < fitness[alpha]:
alpha,beta,delta = 2,alpha,beta
else:
delta = 2
for i in range(3,len(fitness)):
if fitness[i] <= fitness[alpha]:
alpha, beta,delta = i, alpha, beta
elif fitness[i] > fitness[alpha] and fitness[i]<= fitness[beta]:
beta,delta = i,beta
elif fitness[i] > fitness[beta] and fitness[i]<= fitness[delta]:
delta = i
return alpha, beta, delta
def plot_search_agent_positions(f,X,alpha,beta,delta,a,b):
# Plot the positions of search agents
x = X[:,0]
y = X[:,1]
s = plt.scatter(x,y,c='gray',zorder=1)
s = plt.scatter(x[alpha],y[alpha],c='red',zorder=1)
s = plt.scatter(x[beta],y[beta],c='blue',zorder=1)
s = plt.scatter(x[delta],y[delta],c='green',zorder=1)
camera.snap()
# Initialize the position of the search agents
X = random_population_uniform(50,np.array(LB),np.array(UB))
n = len(X)
l = 1
# Plot the first image on screen
x = np.linspace(LB[0],LB[1],1000)
y = np.linspace(LB[0],UB[1],1000)
X1,X2 = np.meshgrid(x,y)
Z = f(X1,X2)
cont = plt.contour(X1,X2,Z,20,linewidths=0.75)
while (l < max_iterations):
# Take the x,y coordinates of the initial population
x = X[:,0]
y = X[:,1]
# Calculate the objective function for each search agent
fitness = list(map(f,x,y))
# Update alpha, beta and delta
alpha,beta,delta = search_agent_fitness(fitness)
# Plot search agent positions
plot_search_agent_positions(f,X,alpha,beta,delta,LB,UB)
# a decreases linearly from 2 to 0
a = 2 - l *(2 / max_iterations)
# Update the position of search agents including the Omegas
for i in range(n):
x_prey = X[alpha]
r1 = np.random.rand(2) #r1 is a random vector in [0,1] x [0,1]
r2 = np.random.rand(2) #r2 is a random vector in [0,1] x [0,1]
A1 = 2*a*r1 - a
C1 = 2*r2
D_alpha = np.abs(C1 * x_prey - X[i])
X_1 = x_prey - A1*D_alpha
x_prey = X[beta]
r1 = np.random.rand(2)
r2 = np.random.rand(2)
A2 = 2*a*r1 - a
C2 = 2*r2
D_beta = np.abs(C2 * x_prey - X[i])
X_2 = x_prey - A2*D_beta
x_prey = X[delta]
r1 = np.random.rand(2)
r2 = np.random.rand(2)
A3 = 2*a*r1 - a
C3 = 2*r2
D_delta = np.abs(C3 * x_prey - X[i])
X_3 = x_prey - A3*D_delta
X[i] = (X_1 + X_2 + X_3)/3
l = l + 1
return X[alpha],camera
Function call
# define the objective function
def f(x,y):
return x**2 + y**2
minimizer,camera = gwo(f,7,[-10,-10],[10,10])
animation = camera.animate(interval = 1000, repeat = True,
repeat_delay = 500)
Is it possible that the line x = np.linspace(LB[0],LB[1],1000) should be x = np.linspace(LB[0],UB[1],1000) instead? With your current definition of x, x is an array only filled with the value -10 which means that you are unlikely to find a contour.
Another thing that you might want to do is to move the cont = plt.contour(X1,X2,Z,20,linewidths=0.75) line inside of your plot_search_agent_positions function to ensure that the contour is plotted at each iteration of the animation.
Once you make those changes, the code looks like that:
import matplotlib.pyplot as plt
import numpy as np
from celluloid import Camera
import ffmpeg
import PIL
from matplotlib import animation, rc
from IPython.display import HTML, Image # For GIF
from scipy.interpolate import griddata
rc('animation', html='html5')
# X : Position vector of the initial population
# n : Initial population size
def gwo(f,max_iterations,LB,UB):
fig = plt.figure()
fig.gca(aspect='equal')
camera = Camera(fig)
def random_population_uniform(m,a,b):
dims = len(a)
x = [list(a + np.multiply(np.random.rand(dims),b - a)) for i in range(m)]
return np.array(x)
def search_agent_fitness(fitness):
alpha = 0
if fitness[1] < fitness[alpha]:
alpha, beta = 1, alpha
else:
beta = 1
if fitness[2] > fitness[alpha] and fitness[2] < fitness[beta]:
beta, delta = 2, beta
elif fitness[2] < fitness[alpha]:
alpha,beta,delta = 2,alpha,beta
else:
delta = 2
for i in range(3,len(fitness)):
if fitness[i] <= fitness[alpha]:
alpha, beta,delta = i, alpha, beta
elif fitness[i] > fitness[alpha] and fitness[i]<= fitness[beta]:
beta,delta = i,beta
elif fitness[i] > fitness[beta] and fitness[i]<= fitness[delta]:
delta = i
return alpha, beta, delta
def plot_search_agent_positions(f,X,alpha,beta,delta,a,b,X1,X2,Z):
# Plot the positions of search agents
x = X[:,0]
y = X[:,1]
s = plt.scatter(x,y,c='gray',zorder=1)
s = plt.scatter(x[alpha],y[alpha],c='red',zorder=1)
s = plt.scatter(x[beta],y[beta],c='blue',zorder=1)
s = plt.scatter(x[delta],y[delta],c='green',zorder=1)
Z=f(X1,X2)
cont=plt.contour(X1,X2,Z,levels=20,colors='k',norm=True)
plt.clabel(cont, cont.levels, inline=True, fontsize=10)
camera.snap()
# Initialize the position of the search agents
X = random_population_uniform(50,np.array(LB),np.array(UB))
n = len(X)
l = 1
# Plot the first image on screen
x = np.linspace(LB[0],UB[1],1000)
y = np.linspace(LB[0],UB[1],1000)
X1,X2 = np.meshgrid(x,y)
Z=f(X1,X2)
while (l < max_iterations):
# Take the x,y coordinates of the initial population
x = X[:,0]
y = X[:,1]
# Calculate the objective function for each search agent
fitness = list(map(f,x,y))
# Update alpha, beta and delta
alpha,beta,delta = search_agent_fitness(fitness)
# Plot search agent positions
plot_search_agent_positions(f,X,alpha,beta,delta,LB,UB,X1,X2,Z)
# a decreases linearly from 2 to 0
a = 2 - l *(2 / max_iterations)
# Update the position of search agents including the Omegas
for i in range(n):
x_prey = X[alpha]
r1 = np.random.rand(2) #r1 is a random vector in [0,1] x [0,1]
r2 = np.random.rand(2) #r2 is a random vector in [0,1] x [0,1]
A1 = 2*a*r1 - a
C1 = 2*r2
D_alpha = np.abs(C1 * x_prey - X[i])
X_1 = x_prey - A1*D_alpha
x_prey = X[beta]
r1 = np.random.rand(2)
r2 = np.random.rand(2)
A2 = 2*a*r1 - a
C2 = 2*r2
D_beta = np.abs(C2 * x_prey - X[i])
X_2 = x_prey - A2*D_beta
x_prey = X[delta]
r1 = np.random.rand(2)
r2 = np.random.rand(2)
A3 = 2*a*r1 - a
C3 = 2*r2
D_delta = np.abs(C3 * x_prey - X[i])
X_3 = x_prey - A3*D_delta
X[i] = (X_1 + X_2 + X_3)/3
l = l + 1
return X[alpha],camera
# define the objective function
def f(x,y):
return x**2 + y**2
minimizer,camera = gwo(f,7,[-10,-10],[10,10])
animation = camera.animate(interval = 1000, repeat = True,repeat_delay = 500)
And the output gives:

Damping harmonic oscillation code with python

I don't know that how make the code the three graph in damping harmonic oscillation model,
[X - t(time)], [V(velocity) - t(time)], [a(acceleration) - t(time)] graph
i can make the [X - t(time)] graph
but i don`t know how to make another graphs..
import numpy as np
from matplotlib import pyplot as plt
# mx'' = - bx' - kx
x_0 = 3
v_0 = 0
y_0 = np.array([x_0,v_0]) # first array
def Euler_Method(f,a,b,y0,step):
t = np.linspace(a,b,step)
h = t[1] - t[0]
Y = [y0]
N = len(t)
n = 0
y = y0
for n in range(0,N-1) :
y = y + h*f(y,t[n])
Y.append(y)
n = n+1
Y = np.array(Y)
return Y, t
def harmonic(y,t) :
k = 50
m = 200
b = 20 # drag coefficient
a = (-1*k/m)*y[0] - (b/m)*y[1] # x'' = a, y[0] : first position
v = y[1] # v = first velocity : y[1]
f = np.array([v,a])
return f
a = Euler_Method(harmonic, 0, 100, y_0, 100000)
X = a[0][:,0]
t = a[1]
plt.plot(t,X)
plt.show()
Why can't you just take the derivative of X to get V and A?
V = np.diff(X)
A = np.diff(V)
fig, (ax1, ax2, ax3) = plt.subplots(3)
fig.suptitle('Vertically stacked subplots')
ax1.plot(t, X)
ax2.plot(t[1:], V)
ax3.plot(t[2:], A)
plt.show()
Gives,

Regularized Logistic Regression in Python (Andrew ng Course)

I'm starting the ML journey and I'm having troubles with this coding exercise
here is my code
import numpy as np
import pandas as pd
import scipy.optimize as op
# Read the data and give it labels
data = pd.read_csv('ex2data2.txt', header=None, name['Test1', 'Test2', 'Accepted'])
# Separate the features to make it fit into the mapFeature function
X1 = data['Test1'].values.T
X2 = data['Test2'].values.T
# This function makes more features (degree)
def mapFeature(x1, x2):
degree = 6
out = np.ones((x1.shape[0], sum(range(degree + 2))))
curr_column = 1
for i in range(1, degree + 1):
for j in range(i+1):
out[:,curr_column] = np.power(x1, i-j) * np.power(x2, j)
curr_column += 1
return out
# Separate the data into training and target, also initialize theta
X = mapFeature(X1, X2)
y = np.matrix(data['Accepted'].values).T
m, n = X.shape
cols = X.shape[1]
theta = np.matrix(np.zeros(cols))
#Initialize the learningRate(sigma)
learningRate = 1
# Define the Sigmoid Function (Output between 0 and 1)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def cost(theta, X, y, learningRate):
# This is require to make the optimize function work
theta = theta.reshape(-1, 1)
error = sigmoid(X # theta)
first = np.multiply(-y, np.log(error))
second = np.multiply(1 - y, np.log(1 - error))
j = np.sum((first - second)) / m + (learningRate * np.sum(np.power(theta, 2)) / 2 * m)
return j
# Define the gradient of the cost function
def gradient(theta, X, y, learningRate):
# This is require to make the optimize function work
theta = theta.reshape(-1, 1)
error = sigmoid(X # theta)
grad = (X.T # (error - y)) / m + ((learningRate * theta) / m)
grad_no = (X.T # (error - y)) / m
grad[0] = grad_no[0]
return grad
Result = op.minimize(fun=cost, x0=theta, args=(X, y, learningRate), method='TNC', jac=gradient)
opt_theta = np.matrix(Result.x)
def predict(theta, X):
sigValue = sigmoid(X # theta.T)
p = sigValue >= 0.5
return p
p = predict(opt_theta, X)
print('Train Accuracy: {:f}'.format(np.mean(p == y) * 100))
So, when the learningRate = 1, the accuracy should be around 83,05% but I'm getting 80.5% and when the learningRate = 0, the accuracy should be 91.52% but I'm getting 87.28%
So the question is What am I doing wrong? Why my accuracy is below the problem default answer?
Hope someone can guide me in the right direction. Thanks!
P.D: Here is the dataset, maybe it can help
https://raw.githubusercontent.com/TheGirlWhiteWithBandages/Machine-Learning-Algorithms/master/Logistic%20Regression/ex2data2.txt
Hey guys I found a way to make it even better!
Here is the code
import numpy as np
import pandas as pd
import scipy.optimize as op
from sklearn.preprocessing import PolynomialFeatures
# Read the data and give it labels
data = pd.read_csv('ex2data2.txt', header=None, names=['Test1', 'Test2', 'Accepted'])
# Separate the data into training and target
X = (data.iloc[:, 0:2]).values
y = (data.iloc[:, 2:3]).values
# Modify the features to a certain degree (Polynomial)
poly = PolynomialFeatures(6)
m = y.size
XX = poly.fit_transform(data.iloc[:, 0:2].values)
# Initialize Theta
theta = np.zeros(XX.shape[1])
# Define the Sigmoid Function (Output between 0 and 1)
def sigmoid(z):
return(1 / (1 + np.exp(-z)))
# Define the Regularized cost function
def costFunctionReg(theta, reg, *args):
# This is require to make the optimize function work
h = sigmoid(XX # theta)
first = np.log(h).T # - y
second = np.log(1 - h).T # (1 - y)
J = (1 / m) * (first - second) + (reg / (2 * m)) * np.sum(np.square(theta[1:]))
return J
# Define the Regularized gradient function
def gradientReg(theta, reg, *args):
theta = theta.reshape(-1, 1)
h = sigmoid(XX # theta)
grad = (1 / m) * (XX.T # (h - y)) + (reg / m) * np.r_[[[0]], theta[1:]]
return grad.flatten()
# Define the predict Function
def predict(theta, X):
sigValue = sigmoid(X # theta.T)
p = sigValue >= 0.5
return p
# A loop to test between different values for sigma (reg parameter)
for i, Sigma in enumerate([0, 1, 100]):
# Optimize costFunctionReg
res2 = op.minimize(costFunctionReg, theta, args=(Sigma, XX, y), method=None, jac=gradientReg)
# Get the accuracy of the model
accuracy = 100 * sum(predict(res2.x, XX) == y.ravel()) / y.size
# Get the Error between different weights
error1 = costFunctionReg(res2.x, Sigma, XX, y)
# print the accuracy and error
print('Train accuracy {}% with Lambda = {}'.format(np.round(accuracy, decimals=4), Sigma))
print(error1)
Thanks for all your help!
try out this:
# import library
import pandas as pd
import numpy as np
dataset = pd.read_csv('ex2data2.csv',names = ['Test #1','Test #2','Accepted'])
# splitting to x and y variables for features and target variable
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values
print('x[0] ={}, y[0] ={}'.format(x[0],y[0]))
m, n = x.shape
print('#{} Number of training samples, #{} features per sample'.format(m,n))
# import library FeatureMapping
from sklearn.preprocessing import PolynomialFeatures
# We also add one column of ones to interpret theta 0 (x with power of 0 = 1) by
include_bias as True
pf = PolynomialFeatures(degree = 6, include_bias = True)
x_poly = pf.fit_transform(x)
pd.DataFrame(x_poly).head(5)
m,n = x_poly.shape
# define theta as zero
theta = np.zeros(n)
# define hyperparameter λ
lambda_ = 1
# reshape (-1,1) because we just have one feature in y column
y = y.reshape(-1,1)
def sigmoid(z):
return 1/(1+np.exp(-z))
def lr_hypothesis(x,theta):
return np.dot(x,theta)
def compute_cost(theta,x,y,lambda_):
theta = theta.reshape(n,1)
infunc1 = -y*(np.log(sigmoid(lr_hypothesis(x,theta)))) - ((1-y)*(np.log(1 - sigmoid(lr_hypothesis(x,theta)))))
infunc2 = (lambda_*np.sum(theta[1:]**2))/(2*m)
j = np.sum(infunc1)/m+ infunc2
return j
# gradient[0] correspond to gradient for theta(0)
# gradient[1:] correspond to gradient for theta(j) j>0
def compute_gradient(theta,x,y,lambda_):
gradient = np.zeros(n).reshape(n,)
theta = theta.reshape(n,1)
infunc1 = sigmoid(lr_hypothesis(x,theta))-y
gradient_in = np.dot(x.transpose(),infunc1)/m
gradient[0] = gradient_in[0,0] # theta(0)
gradient[1:] = gradient_in[1:,0]+(lambda_*theta[1:,]/m).reshape(n-1,) # theta(j) ; j>0
gradient = gradient.flatten()
return gradient
You can now test your cost and gradient without optimization. Th below code will optimize the model:
# hyperparameters
m,n = x_poly.shape
# define theta as zero
theta = np.zeros(n)
# define hyperparameter λ
lambda_array = [0, 1, 10, 100]
import scipy.optimize as opt
for i in range(0,len(lambda_array)):
# Train
print('======================================== Iteration {} ===================================='.format(i))
optimized = opt.minimize(fun = compute_cost, x0 = theta, args = (x_poly, y,lambda_array[i]),
method = 'TNC', jac = compute_gradient)
new_theta = optimized.x
# Prediction
y_pred_train = predictor(x_poly,new_theta)
cm_train = confusion_matrix(y,y_pred_train)
t_train,f_train,acc_train = acc(cm_train)
print('With lambda = {}, {} correct, {} wrong ==========> accuracy = {}%'
.format(lambda_array[i],t_train,f_train,acc_train*100))
Now you should see output like this :
=== Iteration 0 === With lambda = 0, 104 correct, 14 wrong ==========> accuracy = 88.13559322033898%
=== Iteration 1 === With lambda = 1, 98 correct, 20 wrong ==========> accuracy = 83.05084745762711%
=== Iteration 2 === With lambda = 10, 88 correct, 30 wrong ==========> accuracy = 74.57627118644068%
=== Iteration 3 === With lambda = 100, 72 correct, 46 wrong ==========> accuracy = 61.016949152542374%

Categories