In an ML course, I m taking, I have 100 entries of data, and I'm using it in a Perceptron Algorithm.
What I want is to show a plot like this one.
As you can see above we have the data represented by point in red and blue and the different calculated lines that minimize the error. This is the output that I want.. Here is my Data and my code.
data.csv
0.78051,-0.063669,1
0.28774,0.29139,1
0.40714,0.17878,1
0.2923,0.4217,1
0.50922,0.35256,1
0.27785,0.10802,1
0.27527,0.33223,1
0.43999,0.31245,1
0.33557,0.42984,1
0.23448,0.24986,1
0.0084492,0.13658,1
0.12419,0.33595,1
0.25644,0.42624,1
0.4591,0.40426,1
0.44547,0.45117,1
0.42218,0.20118,1
0.49563,0.21445,1
0.30848,0.24306,1
0.39707,0.44438,1
0.32945,0.39217,1
0.40739,0.40271,1
0.3106,0.50702,1
0.49638,0.45384,1
0.10073,0.32053,1
0.69907,0.37307,1
0.29767,0.69648,1
0.15099,0.57341,1
0.16427,0.27759,1
0.33259,0.055964,1
0.53741,0.28637,1
0.19503,0.36879,1
0.40278,0.035148,1
0.21296,0.55169,1
0.48447,0.56991,1
0.25476,0.34596,1
0.21726,0.28641,1
0.67078,0.46538,1
0.3815,0.4622,1
0.53838,0.32774,1
0.4849,0.26071,1
0.37095,0.38809,1
0.54527,0.63911,1
0.32149,0.12007,1
0.42216,0.61666,1
0.10194,0.060408,1
0.15254,0.2168,1
0.45558,0.43769,1
0.28488,0.52142,1
0.27633,0.21264,1
0.39748,0.31902,1
0.5533,1,0
0.44274,0.59205,0
0.85176,0.6612,0
0.60436,0.86605,0
0.68243,0.48301,0
1,0.76815,0
0.72989,0.8107,0
0.67377,0.77975,0
0.78761,0.58177,0
0.71442,0.7668,0
0.49379,0.54226,0
0.78974,0.74233,0
0.67905,0.60921,0
0.6642,0.72519,0
0.79396,0.56789,0
0.70758,0.76022,0
0.59421,0.61857,0
0.49364,0.56224,0
0.77707,0.35025,0
0.79785,0.76921,0
0.70876,0.96764,0
0.69176,0.60865,0
0.66408,0.92075,0
0.65973,0.66666,0
0.64574,0.56845,0
0.89639,0.7085,0
0.85476,0.63167,0
0.62091,0.80424,0
0.79057,0.56108,0
0.58935,0.71582,0
0.56846,0.7406,0
0.65912,0.71548,0
0.70938,0.74041,0
0.59154,0.62927,0
0.45829,0.4641,0
0.79982,0.74847,0
0.60974,0.54757,0
0.68127,0.86985,0
0.76694,0.64736,0
0.69048,0.83058,0
0.68122,0.96541,0
0.73229,0.64245,0
0.76145,0.60138,0
0.58985,0.86955,0
0.73145,0.74516,0
0.77029,0.7014,0
0.73156,0.71782,0
0.44556,0.57991,0
0.85275,0.85987,0
0.51912,0.62359,0
And now this is my code. The first part
import numpy as np
import pandas as pd
# Setting the random seed, feel free to change it and see different solutions.
np.random.seed(42)
import matplotlib.pyplot as plt
def stepFunction(t):
return 1 if t >= 0 else 0
def prediction(X, W, b):
return stepFunction((np.matmul(X, W) + b)[0])
# TODO: Fill in the code below to implement the perceptron trick.
# INPUTS
# data X, the labels y,
# the weights W (as an array), and the bias b,
# The function weights and bias W, b, according to the perceptron algorithm,
# and return W and b.
def perceptronStep(X, y, W, b, learn_rate=0.01):
for i in range(len(X)):
y_hat = prediction(X[i], W, b)
if y[i] - y_hat == 1:
W[0] += X[i][0] * learn_rate
W[1] += X[i][1] * learn_rate
b += learn_rate
elif y[i] - y_hat == -1:
W[0] -= X[i][0] * learn_rate
W[1] -= X[i][1] * learn_rate
b -= learn_rate
return W, b
# This function runs the perceptron algorithm repeatedly on the dataset,
# and returns a few of the boundary lines obtained in the iterations,
# for plotting purposes.
# Feel free to play with the learning rate and the num_epochs,
# and see your results plotted below.
def trainPerceptronAlgorithm(X, y, learn_rate=0.01, num_epochs=25):
x_min, x_max = min(X.T[0]), max(X.T[0])
y_min, y_max = min(X.T[1]), max(X.T[1])
W = np.array(np.random.rand(2, 1))
b = np.random.rand(1)[0] + x_max
# These are the solution lines that get plotted below.
boundary_lines = []
for i in range(num_epochs):
# In each epoch, we apply the perceptron step.
W, b = perceptronStep(X, y, W, b, learn_rate)
# Here I have a doubt . Why if y = W0*x1 + W1*x2 + b
# So we can get x2 =y/W1 -(W0*x1)/W1 -b/W1 + y/W1)
# If we remove y/W1 we just get intercept and slope
# But why we are not using the last term y/W1
boundary_lines.append((-W[0] / W[1], -b / W[1]))
return boundary_lines
# Get data and plot the points
data = pd.read_csv('data.csv', header = None)
X = data.iloc[:, :2].values
y = data.iloc[:, -1].values
x1 = X[:, 0]
x2 = X[:, 1]
color = ['red' if value == 1 else 'blue' for value in y]
plt.scatter(x1, x2, marker='o', color=color)
plt.xlabel('X1 input feature')
plt.ylabel('X2 input feature')
plt.title('Perceptron regression for X1, X2')
plt.show()
When you run this code you correctly get
So now I want to plot the line in the same plot the lines that represent the best function for each iteration.For that, I commented the last line above plt.show() and did
# So now lets plot the lines that represent the best function for each iteration
boundary_lines = trainPerceptronAlgorithm(X, y)
x_lin = np.linspace(0, 1, 100)
for line in boundary_lines:
Θo, Θ1 = line
Θ1 = Θ1[0]
Θo = Θo[0]
# TODO: The equation of the error function is
# y = W0*x1 + W1*x2 + b
# So we can get x2 =y/W1 -(W0*x1)/W1 -b/W1 + y/W1)
# If we remove y/W1 we just get intercept and slope
# boundary_lines.append((-W[0] / W[1], -b / W[1])
# plt.axes([-0.5, -0.5, 1.5, 1.5])
plt.plot(x_lin, (Θ1 * x_lin / Θo))
plt.draw()
plt.pause(5)
input("Press enter to continue")
plt.close()
But that does not get me the expected result.
Why doesn't this get the expected result?
The mistake is in plt.plot(x_lin, (Θ1 * x_lin / Θo)) where instead of Θ1 * x_lin / Θo you should have Θo * x_lin + Θ1.
fig, ax = plt.subplots(1, 1, figsize=(8,5))
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.scatter(x1, x2, marker='o', color=color)
for i, line in enumerate(boundary_lines):
Θo, Θ1 = line
if i == len(boundary_lines) - 1:
c, ls, lw = 'k', '-', 2
else:
c, ls, lw = 'g', '--', 1.5
ax.plot(x_lin, Θo * x_lin + Θ1, c=c, ls=ls, lw=lw)
plt.show()
Result:
Related
I have a nonuniformly sampled data that I am trying to apply a Gaussian filter to. I am using python's numpy library to solve this. The data is of XY type, here is how it looks like:
[[ -0.96 390.63523024]
[ -1.085 390.68523024]
[ -1.21 390.44023023]
...
[-76.695 390.86023024]
[-77.105 392.51023024]
[-77.155 392.10023024]]
And here is a link to the whole *.npz file.
Here is my approach:
I start with defining a Gaussian function
Then I start scanning the data with a while loop along the X axis
Within each step of the loop:
I select a portion of data that is within two cutoff lengths
shift the X axis of the selected data portion to make it symmetrical around 0
calculate my Gaussian function at every point, multiply with corresponding Y values, sum and divide by number of elements
Move to next point
Here is how code looks like:
import numpy as np
import matplotlib.pyplot as plt
xy = np.load('1D_data.npz')['arr_0']
def g_func(xx, w=1.0):
a = 0.47 * w
return (1 / a) * np.exp((xx / a) ** 2 * (-np.pi))
x, y, x_, y_ = xy[:, 0], xy[:, 1], [], []
counter, xi, ww = 0, x[0], 1.0
while xi > np.amin(x):
curr_x = x[(x < xi) & (x >= xi - 2 * ww)]
g, ysel = [], []
for i, els in enumerate(curr_x):
xil = els - curr_x[0] + abs(curr_x[0] - curr_x[-1]) / 2
g.append(g_func(xil, ww))
ysel.append(y[counter + i])
y_.append(np.sum(np.multiply(g, ysel)) / len(g))
x_.append(xi)
counter += 1
xi = x[counter]
plt.plot(x, y, '-k')
plt.plot(x_, y_, '-r')
plt.show()
The output doesn't look right though. (See the fig below) Even if discarding the edges, the convolution is very noisy and the values do not seem to correspond to the data. What am I possibly doing wrong?
You made one mistake in your code:
Before multiplying g with y_sel, y_sel is not centered.
The reason why y_sel should be centered is because we want to add the relative differences weighted by the Gaussian to the entry at the center. If you multiply g with y_sel directly, not just the values of the neighboring entries within the window, but also the value of the center entry will be weighted by the Gaussian. This will definitely change the function values dramatically.
Below is my solution using numpy
def g_func(xx, w=1.0):
mean = np.mean(xx)
a = 0.47 * w
return (1 / a) * np.exp(((xx-mean) / a) ** 2 * (-np.pi))
def get_convolution(array,half_window_size):
array = np.concatenate((np.repeat(array[0],half_window_size),
array,
np.repeat(array[-1],half_window_size)))
window_inds = [list(range(ind-half_window_size,ind+half_window_size+1)) \
for ind in range(half_window_size,len(array)-half_window_size)]
return np.take(array,window_inds)
xy = np.load('1D_data.npz')['arr_0']
x, y = xy[:, 0], xy[:, 1]
half_window_size = 4
x_conv = np.apply_along_axis(g_func,axis=1,arr=get_convolution(x,half_window_size=half_window_size))
y_conv = get_convolution(y,half_window_size=half_window_size)
y_mean = np.mean(y_conv,axis=1)
y_centered = y_conv - y_mean[:,None]
smoothed = np.sum(x_conv*y_centered,axis=1) / (half_window_size*2) + y_mean
fig,ax = plt.subplots(figsize=(10,6))
ax.plot(x, y, '-k')
ax.plot(x, smoothed, '-r')
running the code, the output is
UPDATE
In order to unify w with half_window_size, here is one possibility, the idea is to let the standard deviation of the Gaussian to be 2*half_window_size
def g_func(xx):
std = len(xx)
mean = np.mean(xx)
return 1 / (std*np.sqrt(2*np.pi)) * np.exp(-1/2*((xx-mean)/std)**2)
def get_convolution(array,half_window_size):
array = np.concatenate((np.repeat(array[0],half_window_size),
array,
np.repeat(array[-1],half_window_size)))
window_inds = [list(range(ind-half_window_size,ind+half_window_size+1)) \
for ind in range(half_window_size,len(array)-half_window_size)]
return np.take(array,window_inds)
xy = np.load('1D_data.npz')['arr_0']
x, y = xy[:, 0], xy[:, 1]
half_window_size = 4
x_conv = np.apply_along_axis(g_func,axis=1,arr=get_convolution(x,half_window_size=half_window_size))
y_conv = get_convolution(y,half_window_size=half_window_size)
y_mean = np.mean(y_conv,axis=1)
y_centered = y_conv - y_mean[:,None]
smoothed = np.sum(x_conv*y_centered,axis=1) / (half_window_size*2) + y_mean
fig,ax = plt.subplots(figsize=(10,6))
ax.plot(x, y, '-k')
ax.plot(x, smoothed, '-r')
I'm having an issue using emcee. Its a simple enough 3 parameter fit but occasionally (only has occurred in two scenarios so far despite much use) my walkers burn in just fine but then do not move (see figure below). The acceptance fraction reported is 0.
Has anyone else encountered this issue before? I have tried varying my initial conditions and number of walkers and iterations etc. This piece of code has been running well on very similar data sets. Its not a challenging parameter space and it seems unlikely that the walker would be getting "stuck".
Any ideas? I'm stumped... my walkers are lazy it seems...
Sample code below (and sample data file). This code + data file fail when I run it.
`
import numpy as n
import math
import pylab as py
import matplotlib.pyplot as plt
import scipy
from scipy.optimize import curve_fit
from scipy import ndimage
import pyfits
from scipy import stats
import emcee
import corner
import scipy.optimize as op
import matplotlib.pyplot as pl
from matplotlib.ticker import MaxNLocator
def sersic(x, B,r_s,m):
return B * n.exp(-1.0 * (1.9992*m - 0.3271) * ( (x/r_s)**(1.0/m) - 1.))
def lnprior(theta):
B,r_s,m, lnf = theta
if 0.0 < B < 500.0 and 0.5 < m < 10. and r_s > 0. and -10.0 < lnf < 1.0:
return 0.0
return -n.inf
def lnlike(theta, x, y, yerr): #"least squares"
B,r_s,m, lnf = theta
model = sersic(x,B, r_s, m)
inv_sigma2 = 1.0/(yerr**2 + model**2*n.exp(2*lnf))
return -0.5*(n.sum((y-model)**2*inv_sigma2 - n.log(inv_sigma2)))
def lnprob(theta, x, y, yerr):#kills based on priors
lp = lnprior(theta)
if not n.isfinite(lp):
return -n.inf
return lp + lnlike(theta, x, y, yerr)
profile=open("testprofile.dat",'r') #read in the data file
profilelines=profile.readlines()
x=n.empty(len(profilelines))
y=n.empty(len(profilelines))
yerr=n.empty(len(profilelines))
for i,line in enumerate(profilelines):
col=line.split()
x[i]=col[0]
y[i]=col[1]
yerr[i]=col[2]
# Find the maximum likelihood value.
chi2 = lambda *args: -2 * lnlike(*args)
result = op.minimize(chi2, [50,2.0,0.5,0.5], args=(x, y, yerr))
B_ml, rs_ml,m_ml, lnf_ml = result["x"]
print("""Maximum likelihood result:
B = {0}
r_s = {1}
m = {2}
""".format(B_ml, rs_ml,m_ml))
# Set up the sampler.
ndim, nwalkers = 4, 4000
pos = [result["x"] + 1e-4*n.random.randn(ndim) for i in range(nwalkers)]
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr))
# Clear and run the production chain.
print("Running MCMC...")
Niter = 2000 #2000
sampler.run_mcmc(pos, Niter, rstate0=n.random.get_state())
print("Done.")
# Print out the mean acceptance fraction.
af = sampler.acceptance_fraction
print "Mean acceptance fraction:", n.mean(af)
# Plot sampler chain
pl.clf()
fig, axes = pl.subplots(3, 1, sharex=True, figsize=(8, 9))
axes[0].plot(sampler.chain[:, :, 0].T, color="k", alpha=0.4)
axes[0].yaxis.set_major_locator(MaxNLocator(5))
axes[0].set_ylabel("$B$")
axes[1].plot(sampler.chain[:, :, 1].T, color="k", alpha=0.4)
axes[1].yaxis.set_major_locator(MaxNLocator(5))
axes[1].set_ylabel("$r_s$")
axes[2].plot(n.exp(sampler.chain[:, :, 2]).T, color="k", alpha=0.4)
axes[2].yaxis.set_major_locator(MaxNLocator(5))
axes[2].set_xlabel("step number")
fig.tight_layout(h_pad=0.0)
fig.savefig("line-time_test.png")
# plot MCMC fit
burnin = 100
samples = sampler.chain[:, burnin:, :3].reshape((-1, ndim-1))
B_mcmc, r_s_mcmc, m_mcmc = map(lambda v: (v[0]),
zip(*n.percentile(samples, [50],
axis=0)))
print("""MCMC result:
B = {0}
r_s = {1}
m = {2}
""".format(B_mcmc, r_s_mcmc, m_mcmc))
pl.close()
# Make the triangle plot.
burnin = 50
samples = sampler.chain[:, burnin:, :3].reshape((-1, ndim-1))
fig = corner.corner(samples, labels=["$B$", "$r_s$", "$m$"])
fig.savefig("line-triangle_test.png")
Here's a better result. I made the random initial samples not so close to the maximum likelihood value and run the chain for a lot more steps with fewer walkers/chains. Notice that I'm plotting the m parameter and not its exponential, as you did.
The mean acceptance fraction is ~0.48, and it took about 1 min to run in my laptop. You can of course add more steps and get an even better result.
import numpy as n
import emcee
import corner
import scipy.optimize as op
import matplotlib.pyplot as pl
from matplotlib.ticker import MaxNLocator
def sersic(x, B, r_s, m):
return B * n.exp(
-1.0 * (1.9992 * m - 0.3271) * ((x / r_s)**(1.0 / m) - 1.))
def lnprior(theta):
B, r_s, m, lnf = theta
if 0.0 < B < 500.0 and 0.5 < m < 10. and r_s > 0. and -10.0 < lnf < 1.0:
return 0.0
return -n.inf
def lnlike(theta, x, y, yerr): # "least squares"
B, r_s, m, lnf = theta
model = sersic(x, B, r_s, m)
inv_sigma2 = 1.0 / (yerr**2 + model**2 * n.exp(2 * lnf))
return -0.5 * (n.sum((y - model)**2 * inv_sigma2 - n.log(inv_sigma2)))
def lnprob(theta, x, y, yerr): # kills based on priors
lp = lnprior(theta)
if not n.isfinite(lp):
return -n.inf
return lp + lnlike(theta, x, y, yerr)
profile = open("testprofile.dat", 'r') # read in the data file
profilelines = profile.readlines()
x = n.empty(len(profilelines))
y = n.empty(len(profilelines))
yerr = n.empty(len(profilelines))
for i, line in enumerate(profilelines):
col = line.split()
x[i] = col[0]
y[i] = col[1]
yerr[i] = col[2]
# Find the maximum likelihood value.
chi2 = lambda *args: -2 * lnlike(*args)
result = op.minimize(chi2, [50, 2.0, 0.5, 0.5], args=(x, y, yerr))
B_ml, rs_ml, m_ml, lnf_ml = result["x"]
print("""Maximum likelihood result:
B = {0}
r_s = {1}
m = {2}
lnf = {3}
""".format(B_ml, rs_ml, m_ml, lnf_ml))
# Set up the sampler.
ndim, nwalkers = 4, 10
pos = [result["x"] + 1e-1 * n.random.randn(ndim) for i in range(nwalkers)]
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr))
# Clear and run the production chain.
print("Running MCMC...")
Niter = 50000
sampler.run_mcmc(pos, Niter, rstate0=n.random.get_state())
print("Done.")
# Print out the mean acceptance fraction.
af = sampler.acceptance_fraction
print("Mean acceptance fraction:", n.mean(af))
# Plot sampler chain
pl.clf()
fig, axes = pl.subplots(3, 1, sharex=True, figsize=(8, 9))
axes[0].plot(sampler.chain[:, :, 0].T, color="k", alpha=0.4)
axes[0].yaxis.set_major_locator(MaxNLocator(5))
axes[0].set_ylabel("$B$")
axes[1].plot(sampler.chain[:, :, 1].T, color="k", alpha=0.4)
axes[1].yaxis.set_major_locator(MaxNLocator(5))
axes[1].set_ylabel("$r_s$")
# axes[2].plot(n.exp(sampler.chain[:, :, 2]).T, color="k", alpha=0.4)
axes[2].plot(sampler.chain[:, :, 2].T, color="k", alpha=0.4)
axes[2].yaxis.set_major_locator(MaxNLocator(5))
axes[2].set_ylabel("$m$")
axes[2].set_xlabel("step number")
fig.tight_layout(h_pad=0.0)
fig.savefig("line-time_test.png")
# plot MCMC fit
burnin = 10000
samples = sampler.chain[:, burnin:, :3].reshape((-1, ndim - 1))
B_mcmc, r_s_mcmc, m_mcmc = map(
lambda v: (v[0]), zip(*n.percentile(samples, [50], axis=0)))
print("""MCMC result:
B = {0}
r_s = {1}
m = {2}
""".format(B_mcmc, r_s_mcmc, m_mcmc))
pl.close()
# Make the triangle plot.
burnin = 50
samples = sampler.chain[:, burnin:, :3].reshape((-1, ndim - 1))
fig = corner.corner(samples, labels=["$B$", "$r_s$", "$m$"])
fig.savefig("line-triangle_test.png")
I'm trying to implement in Python the first exercise of Andrew NG's Coursera Machine Learning course. In the course the exercise is with Matlab/Octave, but I wanted to implement it in Python as well.
The problem is that the line that updates theta values, does not seem to be working right, is returning values [[0.72088159] [0.72088159]] but should return [[-3.630291] [1.166362]]
I'm using a learning rate of 0.01 and the gradient loop was set to 1500 (the same values from the original exercise in Octave).
And obviously, with these wrong values for theta, the predictions are not correct as shown in the last chart.
In the rows in which I tesyo the cost function with theta values defined as [0; 0] and [-1; 2], the results are correct (the same as the exercise in Octave), so the error can only be in the function of the gradient, but I do not know what went wrong.
I wanted someone to help me figure out what I'm doing wrong. I'm grateful already.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
def load_data():
X = np.genfromtxt('data.txt', usecols=(0), delimiter=',', dtype=None)
y = np.genfromtxt('data.txt', usecols=(1), delimiter=',', dtype=None)
X = X.reshape(1, X.shape[0])
y = y.reshape(1, y.shape[0])
ones = np.ones(X.shape)
X = np.append(ones, X, axis=0)
theta = np.zeros((2, 1))
return (X, y, theta)
alpha = 0.01
iter_num = 1500
debug_at_loop = 10
def plot(x, y, y_hat=None):
x = x.reshape(x.shape[0], 1)
plt.xlabel('x')
plt.ylabel('hΘ(x)')
plt.ylim(ymax = 25, ymin = -5)
plt.xlim(xmax = 25, xmin = 5)
plt.scatter(x, y)
if type(y_hat) is np.ndarray:
plt.plot(x, y_hat, '-')
plt.show()
plot(X[1], y)
def hip(X, theta):
return np.dot(theta.T, X)
def cost(X, y, theta):
m = y.shape[1]
return np.sum(np.square(hip(X, theta) - y)) / (2 * m)
print('With theta = [0 ; 0]')
print('Cost computed =', cost(X, y, np.array([0, 0])))
print()
print('With theta = [-1 ; 2]')
print('Cost computed =', cost(X, y, np.array([-1, 2])))
def grad(X, y, alpha, theta, iter_num=1500, debug_cost_at_each=10):
J = []
m = y.shape[1]
for i in range(iter_num):
theta -= ((alpha * 1) / m) * np.sum(np.dot(hip(X, theta) - y, X.T))
if i % debug_cost_at_each == 0:
J.append(round(cost(X, y, theta), 6))
return J, theta
X, y, theta = load_data()
J, fit_theta = grad(X, y, alpha, theta)
print('Theta found by Gradient Descent:', fit_theta)
# Predict values for population sizes of 35,000 and 70,000
predict1 = np.dot(np.array([[1], [3.5]]).T, fit_theta);
print('For population = 35,000, we predict a profit of \n', predict1 * 10000);
predict2 = np.dot(np.array([[1], [7]]).T, fit_theta);
print('For population = 70,000, we predict a profit of \n', predict2 * 10000);
pred_y = hip(X, fit_theta)
plot(X[1], y, pred_y.T)
The data I'm using is the following txt:
6.1101,17.592
5.5277,9.1302
8.5186,13.662
7.0032,11.854
5.8598,6.8233
8.3829,11.886
7.4764,4.3483
8.5781,12
6.4862,6.5987
5.0546,3.8166
5.7107,3.2522
14.164,15.505
5.734,3.1551
8.4084,7.2258
5.6407,0.71618
5.3794,3.5129
6.3654,5.3048
5.1301,0.56077
6.4296,3.6518
7.0708,5.3893
6.1891,3.1386
20.27,21.767
5.4901,4.263
6.3261,5.1875
5.5649,3.0825
18.945,22.638
12.828,13.501
10.957,7.0467
13.176,14.692
22.203,24.147
5.2524,-1.22
6.5894,5.9966
9.2482,12.134
5.8918,1.8495
8.2111,6.5426
7.9334,4.5623
8.0959,4.1164
5.6063,3.3928
12.836,10.117
6.3534,5.4974
5.4069,0.55657
6.8825,3.9115
11.708,5.3854
5.7737,2.4406
7.8247,6.7318
7.0931,1.0463
5.0702,5.1337
5.8014,1.844
11.7,8.0043
5.5416,1.0179
7.5402,6.7504
5.3077,1.8396
7.4239,4.2885
7.6031,4.9981
6.3328,1.4233
6.3589,-1.4211
6.2742,2.4756
5.6397,4.6042
9.3102,3.9624
9.4536,5.4141
8.8254,5.1694
5.1793,-0.74279
21.279,17.929
14.908,12.054
18.959,17.054
7.2182,4.8852
8.2951,5.7442
10.236,7.7754
5.4994,1.0173
20.341,20.992
10.136,6.6799
7.3345,4.0259
6.0062,1.2784
7.2259,3.3411
5.0269,-2.6807
6.5479,0.29678
7.5386,3.8845
5.0365,5.7014
10.274,6.7526
5.1077,2.0576
5.7292,0.47953
5.1884,0.20421
6.3557,0.67861
9.7687,7.5435
6.5159,5.3436
8.5172,4.2415
9.1802,6.7981
6.002,0.92695
5.5204,0.152
5.0594,2.8214
5.7077,1.8451
7.6366,4.2959
5.8707,7.2029
5.3054,1.9869
8.2934,0.14454
13.394,9.0551
5.4369,0.61705
Well, I got it after losing several strands of hair (the programming will still leave me bald).
It was on the gradient line, and the solution was this:
theta -= ((alpha * 1) / m) * np.dot(X, (hip(X, theta) - y).T)
I changed the place of X and transposed the error vector.
I am trying to follow the book by Daume
http://ciml.info/dl/v0_99/ciml-v0_99-ch04.pdf (page 43).
To fit a model for vanilla perceptron in python using numpy and without
using sciki-learn library.
The algorithm is given in the book
How can we implement this model in practice?
So far I have learned how to read the data and labels:
def read_data(infile):
data = np.loadtxt(infile)
X = data[:,:-1]
Y = data[:,-1]
return X, Y
The help will be appreciated!!
One way I figured out is this:
(Better ideas are always welcome!!)
#!python
# -*- coding: utf-8 -*-#
"""
Perceptron Algorithm.
#author: Bhishan Poudel
#date: Oct 31, 2017
"""
# Imports
import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import norm
import os, shutil
np.random.seed(100)
def read_data(infile):
data = np.loadtxt(infile)
X = data[:,:-1]
Y = data[:,-1]
return X, Y
def plot_boundary(X,Y,w,epoch):
try:
plt.style.use('seaborn-darkgrid')
# plt.style.use('ggplot')
#plt.style.available
except:
pass
# Get data for two classes
idxN = np.where(np.array(Y)==-1)
idxP = np.where(np.array(Y)==1)
XN = X[idxN]
XP = X[idxP]
# plot two classes
plt.scatter(XN[:,0],XN[:,1],c='b', marker='_', label="Negative class")
plt.scatter(XP[:,0],XP[:,1],c='r', marker='+', label="Positive class")
# plt.plot(XN[:,0],XN[:,1],'b_', markersize=8, label="Negative class")
# plt.plot(XP[:,0],XP[:,1],'r+', markersize=8, label="Positive class")
plt.title("Perceptron Algorithm iteration: {}".format(epoch))
# plot decision boundary orthogonal to w
# w is w2,w1, w0 last term is bias.
if len(w) == 3:
a = -w[0] / w[1]
b = -w[0] / w[2]
xx = [ 0, a]
yy = [b, 0]
plt.plot(xx,yy,'--g',label='Decision Boundary')
if len(w) == 2:
x2=[ w[0], w[1], -w[1], w[0]]
x3=[ w[0], w[1], w[1], -w[0]]
x2x3 =np.array([x2,x3])
XX,YY,U,V = list(zip(*x2x3))
ax = plt.gca()
ax.quiver(XX,YY,U,V,scale=1, color='g')
# Add labels
plt.xlabel('X')
plt.ylabel('Y')
# limits
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
plt.xlim(x_min,x_max)
plt.ylim(y_min,y_max)
# lines from origin
plt.axhline(y=0, color='k', linestyle='--',alpha=0.2)
plt.axvline(x=0, color='k', linestyle='--',alpha=0.2)
plt.grid(True)
plt.legend(loc=1)
plt.show()
# Always clost the plot
plt.close()
def predict(X,w):
return np.sign(np.dot(X, w))
def plot_contour(X,Y,w,mesh_stepsize):
try:
plt.style.use('seaborn-darkgrid')
# plt.style.use('ggplot')
#plt.style.available
except:
pass
# Get data for two classes
idxN = np.where(np.array(Y)==-1)
idxP = np.where(np.array(Y)==1)
XN = X[idxN]
XP = X[idxP]
# plot two classes with + and - sign
fig, ax = plt.subplots()
ax.set_title('Perceptron Algorithm')
plt.xlabel("X")
plt.ylabel("Y")
plt.plot(XN[:,0],XN[:,1],'b_', markersize=8, label="Negative class")
plt.plot(XP[:,0],XP[:,1],'y+', markersize=8, label="Positive class")
plt.legend()
# create a mesh for contour plot
# We first make a meshgrid (rectangle full of pts) from xmin to xmax and ymin to ymax.
# We then predict the label for each grid point and color it.
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
# Get 2D array for grid axes xx and yy (shape = 700, 1000)
# xx has 700 rows.
# xx[0] has 1000 values.
xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_stepsize),
np.arange(y_min, y_max, mesh_stepsize))
# Get 1d array for x and y axes
xxr = xx.ravel() # shape (700000,)
yyr = yy.ravel() # shape (700000,)
# ones vector
# ones = np.ones(xxr.shape[0]) # shape (700000,)
ones = np.ones(len(xxr)) # shape (700000,)
# Predict the score
Xvals = np.c_[ones, xxr, yyr]
scores = predict(Xvals, w)
# Plot contour plot
scores = scores.reshape(xx.shape)
ax.contourf(xx, yy, scores, cmap=plt.cm.Paired)
# print("xx.shape = {}".format(xx.shape)) # (700, 1000)
# print("scores.shape = {}".format(scores.shape)) # (700, 1000)
# print("scores[0].shape = {}".format(scores[0].shape)) # (1000,)
# show the plot
plt.savefig("Perceptron.png")
plt.show()
plt.close()
def perceptron_sgd(X, Y,epochs):
"""
X: data matrix without bias.
Y: target
"""
# add bias to X's first column
ones = np.ones(X.shape[0]).reshape(X.shape[0],1)
X1 = np.append(ones, X, axis=1)
w = np.zeros(X1.shape[1])
final_iter = epochs
for epoch in range(epochs):
print("\n")
print("epoch: {} {}".format(epoch, '-'*30))
misclassified = 0
for i, x in enumerate(X1):
y = Y[i]
h = np.dot(x, w)*y
if h <= 0:
w = w + x*y
misclassified += 1
print('misclassified? yes w: {} '.format(w,i))
else:
print('misclassified? no w: {}'.format(w))
pass
if misclassified == 0:
final_iter = epoch
break
return w, final_iter
def gen_lin_separable_data(data, data_tr, data_ts,data_size):
mean1 = np.array([0, 2])
mean2 = np.array([2, 0])
cov = np.array([[0.8, 0.6], [0.6, 0.8]])
X1 = np.random.multivariate_normal(mean1, cov, size=int(data_size/2))
y1 = np.ones(len(X1))
X2 = np.random.multivariate_normal(mean2, cov, size=int(data_size/2))
y2 = np.ones(len(X2)) * -1
with open(data,'w') as fo, \
open(data_tr,'w') as fo1, \
open(data_ts,'w') as fo2:
for i in range( len(X1)):
line = '{:5.2f} {:5.2f} {:5.0f} \n'.format(X1[i][0], X1[i][1], y1[i])
line2 = '{:5.2f} {:5.2f} {:5.0f} \n'.format(X2[i][0], X2[i][1], y2[i])
fo.write(line)
fo.write(line2)
for i in range( len(X1) - 20):
line = '{:5.2f} {:5.2f} {:5.0f} \n'.format(X1[i][0], X1[i][1], y1[i])
line2 = '{:5.2f} {:5.2f} {:5.0f} \n'.format(X2[i][0], X2[i][1], y2[i])
fo1.write(line)
fo1.write(line2)
for i in range((len(X1) - 20), len(X1) ):
line = '{:5.2f} {:5.2f} {:5.0f} \n'.format(X1[i][0], X1[i][1], y1[i])
line2 = '{:5.2f} {:5.2f} {:5.0f} \n'.format(X2[i][0], X2[i][1], y2[i])
fo2.write(line)
fo2.write(line2)
def main():
"""Run main function."""
# generate linearly separable data
data = 'data.txt'
data_tr = 'data_train.txt'
data_ts = 'data_test.txt'
data_size = 200
gen_lin_separable_data(data, data_tr, data_ts,data_size)
# read data
epochs = 20
X_train, Y_train = read_data(data_tr)
X_test, Y_test = read_data(data_ts)
# fit perceptron
w, final_iter = perceptron_sgd(X_train,Y_train,epochs)
print('w = ', w)
plot_boundary(X_test,Y_test,w,final_iter)
# contour plot
mesh_stepsize = 0.01
plot_contour(X_test,Y_test,w,mesh_stepsize)
if __name__ == "__main__":
main()
The decision boundary looks like this:
There is an implementation of perceptron in my recent repo: NP_ML. The example result is:
I have the following code:
x1 = np.random.randn(100)
y1 = np.random.randn(100) + 3
x2 = np.random.randn(100) + 3
y2 = np.random.randn(100)
plt.plot(x1, y1, "+", x2, y2, "x")
plt.axis('equal')
plt.show()
which results in the following image
I have implemented my own logistic regression, and this returns a theta, and I want to use this theta to plot the decision boundary, but I'm not sure how to do this.
X = np.matrix(np.vstack((np.hstack((x1,x2)), np.hstack((y1,y2)))).T)
X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
Y = np.matrix(1.0 * np.hstack((np.zeros(100), np.ones(100)))).T
learning_rate = 0.0001
iterations = 3000
theta = np.matrix([[0.5], [0.5], [0.5]])
theta = logistic_regression(theta, X, Y, learning_rate, iterations)
and this gives theta =
[[ 0.40377942]
[ 0.53696461]
[ 0.1398419 ]]
for example. How can I use this to plot the decision boundary?
You want to plot θTX = 0, where X is the vector containing (1, x, y). That is, you want to plot the line defined by theta[0] + theta[1]*x + theta[2]*y = 0. Solve for y:
y = -(theta[0] + theta[1]*x)/theta[2]
So, something like:
theta = theta[:,0] # Make theta a 1-d array.
x = np.linspace(-6, 6, 50)
y = -(theta[0] + theta[1]*x)/theta[2]
plt.plot(x, y)
Something doesn't look right, though, because you have theta[1] > 0 and theta[2] > 0, which results in a line with a negative slope.