Logistic regression from scratch: error keeps increasing - python

I have implemented logistic regression from scratch, however when I run the script the algorithm always predict the wrong label.
I've tried changing the training output and test_output by switching all 1 to 0 and vice versa but it always predict the wrong label.
I also noticed that changing the "-" sign to "+", when updating the weigths and the bias, the script correctly predicts the label.
What am I doing wrong?
This is the code I've written:
# IMPORTS
import numpy as np
# HYPERPARAMETERS
EPOCHS = 1000
LEARNING_RATE = 0.1
# FUNCTIONS
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def cost(y_pred, training_outputs, m):
j = - np.sum(training_outputs * np.log(y_pred) + (1 - training_outputs) * np.log(1 - y_pred)) / m
return j
# ENTRY
if __name__ == "__main__":
# Training input and output
x = np.array([[1, 1, 1], [0, 0, 0], [1, 0, 1]])
training_outputs = np.array([1, 0, 1])
# Test input and output
test_input = np.array([[0, 1, 1]])
test_output = np.array([0])
# Weigths
w = np.array([0.3, 0.3, 0.3])
# Biases
b = 0
m = 3
# Training
for iteration in range(EPOCHS):
print("Iteration n.", iteration, end= "\r")
# Compute log odds
z = np.dot(x, w) + b
# Compute predicted probability
y_pred = sigmoid(z)
# Back propagation
dz = y_pred - training_outputs
dw = np.dot(x, dz) / m
db = np.sum(dz) / m
# Update weights and bias according to the gradient descent algorithm
w = w - LEARNING_RATE * dw
b = b - LEARNING_RATE * db
print("Model trained. Proceeding with model evaluation...")
# Test
# Compute log odds
z = np.dot(test_input, w) + b
# Compute predicted probability
y_pred = sigmoid(z)
print(y_pred)
# Compute cost
cost = cost(y_pred, test_output, m)
print(cost)

There was an incorrect assumption pointed out by #J_H:
>>> from sklearn.linear_model import LogisticRegression
>>> import numpy as np
>>> x = np.array([[1, 1, 1], [0, 0, 0], [1, 0, 1]])
>>> y = np.array([1, 0, 1])
>>> clf = LogisticRegression().fit(x, y)
>>> clf.predict([[0, 1, 1]])
array([1])
scikit-learn at appears to believe that test_output should be a 1 rather than a 0.
A few more recommendations:
m should be fine to remove (it's a constant, so it could be included in the LEARNING_RATE)
w should be initialized proportional to the number of columns in x (i.e., x.shape[1])
dw = np.dot(x, dz) should be np.dot(dz, x)
Prediction in logistic regression depends on a threshold, usually 0.5
Taking this into account would look something like the following.
# Initialize weights and bias
w, b = np.zeros(X.shape[1]), 0
for _ in range(EPOCHS):
# Compute log odds
z = np.dot(x, w) + b
# Compute predicted probability
y_pred = sigmoid(z)
# Back propagation
dz = y_pred - training_outputs
dw = np.dot(dz, x)
db = np.sum(dz)
# Update
w = w - LEARNING_RATE * dw
b = b - LEARNING_RATE * db
# Test
z = np.dot(test_input, w) + b
test_pred = sigmoid(z) >= 0.5
print(test_pred)
And a complete example on random train/test sets created with sklearn.datasets.make_classification could look like this—which usually gets within a few decimals of the scikit-learn implementation as well:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
EPOCHS = 100
LEARNING_RATE = 0.01
def sigmoid(z):
return 1 / (1 + np.exp(-z))
if __name__ == "__main__":
X, y = make_classification(n_samples=1000, n_features=5)
X_train, X_test, y_train, y_test = train_test_split(X, y)
# Initialize `w` and `b`
w, b = np.zeros(X.shape[1]), 0
for _ in range(EPOCHS):
z = np.dot(X_train, w) + b
y_pred = sigmoid(z)
dz = y_pred - y_train
dw = np.dot(dz, X_train)
db = np.sum(dz)
w = w - LEARNING_RATE * dw
b = b - LEARNING_RATE * db
# Test
z = np.dot(X_test, w) + b
test_pred = sigmoid(z) >= 0.5
print(accuracy_score(y_test, test_pred))

Related

Pricing american options using deep learning, put instead of max-call

So I'm trying to learn to optimally stop options in a Black-Scholes setting along the lines of the article: "Solving high-dimensional optimal stopping problems using deep learning" by Sebastian Becker, Patrick Cheridito, Arnulf Jentzen, and Timo Welti.
The framework used to price options is the following:
import tensorflow as tf
from tensorflow.python.training.moving_averages import assign_moving_average
def neural_net(x, neurons, is_training, dtype=tf.float32, decay=0.9):
def batch_normalization(y):
shape = y.get_shape().as_list()
y = tf.reshape(y, [-1, shape[1] * shape[2]])
#variables for batch normalization
beta = tf.compat.v1.get_variable(
name='beta', shape=[shape[1] * shape[2]],
dtype=dtype, initializer=tf.zeros_initializer())
gamma = tf.compat.v1.get_variable(
name='gamma', shape=[shape[1] * shape[2]],
dtype=dtype, initializer=tf.ones_initializer())
mv_mean = tf.compat.v1.get_variable(
'mv_mean', [shape[1]*shape[2]],
dtype = dtype, initializer=tf.zeros_initializer(),
trainable = False)
mv_var = tf.compat.v1.get_variable(
'mv_var', [shape[1]*shape[2]],
dtype = dtype, initializer =tf.ones_initializer(),
trainable = False)
mean,variance = tf.nn.moments(y, [0], name = 'moments')
tf.compat.v1.add_to_collection(
tf.compat.v1.GraphKeys.UPDATE_OPS,
assign_moving_average(mv_mean, mean, decay,
zero_debias=True))
tf.compat.v1.add_to_collection(
tf.compat.v1.GraphKeys.UPDATE_OPS,
assign_moving_average(mv_var, variance, decay,
zero_debias=False))
mean, variance = tf.cond(is_training, lambda: (mean, variance),
lambda: (mv_mean, mv_var))
y = tf.nn.batch_normalization(y, mean, variance, beta, gamma, 1e-6)
return tf.reshape(y, [-1, shape[1], shape[2]])
def fc_layer(y, out_size, activation, is_single):
shape = y.get_shape().as_list()
w = tf.compat.v1.get_variable(
name='weights',
shape=[shape[2], shape[1], out_size],
dtype=dtype,
initializer=tf.initializers.glorot_uniform())
y = tf.transpose(tf.matmul(tf.transpose(y, [2, 0, 1]), w),
[1, 2, 0])
if is_single:
b = tf.compat.v1.get_variable(
name='bias',
shape=[out_size, shape[2]],
dtype = dtype,
initializer=tf.zeros_initializer())
return activation(y + b)
return activation(batch_normalization(y))
x = batch_normalization(x)
for i in range(len(neurons)):
with tf.compat.v1.variable_scope('layer_' + str(i)):
x = fc_layer(x, neurons[i],
tf.nn.relu if i < len(neurons) - 1
else tf.nn.sigmoid, False)
return x
#then Deep optimal stopping
def deep_optimal_stopping(x, t, n, g, neurons, batch_size, train_steps,
mc_runs, lr_boundaries, lr_values, beta1=0.9,
beta2=0.999, epsilon=1e-8, decay=0.9):
is_training = tf.compat.v1.placeholder(tf.bool, []) # a variable used to distinguish between training and Monte Carlo simulation, used for batch noralization
p = g(t, x) # we evaluate the payoff for the whole batch at every point in time
nets = neural_net(tf.concat([x[:, :, :-1], p[:, :, :-1]], axis=1),
neurons, is_training, decay=decay)
u_list = [nets[:, :, 0]]
u_sum = u_list[-1]
for k in range(1, n - 1): #range(start, stop)
u_list.append(nets[:, :, k] * (1. - u_sum)) # we build a neural network to approximate the stopping decision at time n*T/N
u_sum += u_list[-1]
#last iteration?
u_list.append(1. - u_sum)
u_stack = tf.concat(u_list, axis=1)
p = tf.squeeze(p, axis=1) #removes dimension of size 1
loss = tf.reduce_mean(tf.reduce_sum(-u_stack * p, axis=1)) #loss function
idx = tf.argmax(tf.cast(tf.cumsum(u_stack, axis=1) + u_stack >= 1,
dtype=tf.uint8), #idx for index?, argmax takes index for largest value
axis=1, output_type=tf.int32)
stopped_payoffs = tf.reduce_mean(
tf.gather_nd(p, tf.stack([tf.range(0, batch_size, dtype=tf.int32),
idx], axis=1))) # this is the approximation of the price for one batch, we will calculate the mean over MC-runs of those numbers
global_step = tf.Variable(0) # a variable used to apply the learning rate schedule, without it the optimizer would not know at which training step we are
learning_rate = tf.compat.v1.train.piecewise_constant(global_step,
lr_boundaries,
lr_values) # this gives us a piecewise constant learning rate, according to the schedule
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate,
beta1=beta1,
beta2=beta2,# define the optimizer, we use Adam with our learning rate schedule and a small tweak of one of its parameters
epsilon=epsilon)
update_ops = tf.compat.v1.get_collection(
tf.compat.v1.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(loss, global_step=global_step)
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
for _ in range(train_steps):
sess.run(train_op, feed_dict={is_training: True})
px_mean = 0. # value that will hold the price
for _ in range(mc_runs): # loop over the number of MC runs
px_mean += sess.run(stopped_payoffs,
feed_dict={is_training: False})# we stop training, this is used for the batch normalization, from now on we will use the sampled moving averages
return px_mean / mc_runs
Now we define the various variables and simulate paths of a stock as X. Then we run use deep_optimal_stopping function to price the option, defined in the following code
import tensorflow as tf
import numpy as np
import time
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
T, N, K = 3., 9, 100.
r, delta, beta = 0.05, 0.1, 0.2
batch_size = 800#8192
lr_values = [0.05, 0.005, 0.0005]
mc_runs = 50#500
def g(s, x):
return tf.exp(-r * s) \
* tf.maximum(tf.reduce_max(x, axis=1, keepdims=True) - K, 0.)
_file = open('example_4_4_1_1.csv', 'w')
_file.write('dim, run, mean, time\n')
for d in [2, 3, 5, 10, 20, 30, 50, 100, 200, 500]:
for s_0 in [40.]:#[90., 100., 110.]:
for run in range(5):
tf.compat.v1.reset_default_graph()
t0 = time.time()
neurons = [d + 50, d + 50, 1]
train_steps = 1500 + d
lr_boundaries = [int(500 + d / 5), int(1500 + 3 * d / 5)]
W = tf.cumsum(tf.compat.v1.random_normal(
shape=[batch_size, d, N],
stddev=np.sqrt(T / N)), axis=2)
t = tf.constant(np.linspace(start=T / N, stop=T, num=N,
endpoint=True, dtype=np.float32))
#X = tf.exp((r - delta - beta ** 2 / 2.) * t + beta * W) * s_0
px_mean = deep_optimal_stopping(
W, t, N, g, neurons, batch_size,
train_steps, mc_runs,
lr_boundaries, lr_values, epsilon=0.1)
t1 = time.time()
print("")
_file.write('%i, %i, %f, %f\n' % (d, run, px_mean, t1 - t0))
_file.close()
So here the option is a bermudan max-call defined by the payoff function g(s,x). My understanding would be, if I wanted the price of an American put, I instead changed the payoff function g to be:
def g(s, x):
return tf.exp(-r * s) * tf.maximum(K-x, 0.)
and otherwise changing nothing. But instead of getting a price of 5.31 as reported in their article, I get 4.02.
Can someone explain where I'm going wrong with my understanding of the problem?

Why doesn't my logistic regression algorithm work?

I'm trying to compute my gradient for a multiclass classification model with logistic regression and it seems not to be working properly.
This is the data that I am using for this model.
import pandas as pd
from sklearn.preprocessing import normalize
# Create x and y datasets
path = '/kaggle/input/digit-recognizer'
# Set train and test sets
data = pd.read_csv(path + '/train.csv', nrows=6000)
x_train, y_train = data.iloc[:4800, 1:].values, data.iloc[:4800, 0].values
x_test, y_test = data.iloc[4800:, 1:].values, data.iloc[4800:, 0].values
# Normalize and expand dims
x_train, x_test = x_train / 255, x_test / 255
y_train, y_test = np.expand_dims(y_train, 1), np.expand_dims(y_test, 1)
assert len(x_train) == len(y_train)
assert len(x_test) == len(y_test)
x_train.shape, y_train.shape
((4800, 784), (4800, 1))
Here is the following code where I try to implement gradient descent:
def Sigmoid(z):
from math import e
return 1 / (1 + e**-z)
def CostFunction(h, y, m):
j = -(1/m) * (y # np.log(h) + (1-y) # np.log(1-h))
return j
def GradientDescent(X, y, theta, n_classes):
import numpy as np
# Useful variables
m = len(y)
theta0, theta1 = [x.copy() for x in theta]
grad0, grad1 = [np.zeros(x.shape) for x in [theta0, theta1]]
y_vec = np.zeros((m, n_classes))
j = 0
for i in range(m):
y_vec[i, y[i]] = 1
### Forward propagation
a0 = np.concatenate(([1], X[i]))
a1 = np.concatenate(([1], Sigmoid(theta0 # a0)))
a2 = Sigmoid(theta1 # a1)
h = a2
j += CostFunction(h, y_vec[i], m)
### Backpropagation
delta2 = a2 - y_vec[i]
delta1 = theta1.T # delta2 * (a1 * (1 - a1))
grad0 += np.expand_dims(delta1[1:], 1) # np.expand_dims(a0, 0)
grad1 += np.expand_dims(delta2, 1) # np.expand_dims(a1, 0)
grad0 = grad0 / m
grad1 = grad1 / m
return j, [grad0, grad1]
Now comes the training process.
### Create theta parameters
n_layers = 3
n_classes = 10
# Weigth matrix dims(i, j) = (number of nodes, input shape + bias)
theta0 = np.random.uniform(0, 0.01, (24, x_train.shape[1] + 1))
theta1 = np.random.uniform(0, 0.01, (n_classes, len(theta0) + 1))
theta_params = [theta0, theta1]
### Train parameters
%%time
epochs = 200
alpha = 0.001
j, t = np.zeros(epochs), theta_params.copy()
for i in range(epochs):
print("Iterarion: {}/{}".format(i + 1, epochs))
j[i], g = GradientDescent(x_train, y_train, t, n_classes)
print(j[i])
t[0] = t[0] - a * g[0]
t[1] = t[1] - a * g[1]
The cost starts from J=7.2583 and goes down to approximately J=3.5223, where it gets stuck.
Then, whenever I try to predict any of the samples from the training or test sets it outputs the same approximate probability for all classes.
def Predict(X, theta):
import numpy as np
# Useful variables
m = len(X)
theta0, theta1 = [x for x in theta]
h = np.zeros(m)
for i in range(m):
### Forward propagation
a0 = np.concatenate(([1], X[i]))
a1 = np.concatenate(([1], Sigmoid(theta0 # a0)))
a2 = Sigmoid(theta1 # a1)
print(a2)
h[i] = np.argmax(a2)
return h
Predict(x_train[:1], t)
[0.20078521 0.19842413 0.20535222 0.1953332 0.19425315 0.19302124
0.20107485 0.19589331 0.19688894 0.19526526]
array([2.])
Notice that I'm am printing the results of the hypothesis probability for each node in the last layer during the Predict function.
Anyone could point me the direction by sharing some tips?

Issue Implementing Custom Gradient Descent Function

I am implementing my own/custom Gradient descent algorithm using python but the weights and biases that are returned by my algorithm has 10 values (shape=(10, )) but my input data has only 1 column so I am expecting it to return 1 Weight and 1 Bias
Code:
import numpy as np
import matplotlib.pyplot as plt
def SGD(X, y, learning_rate=0.01, max_iter=1000):
w = np.random.randn(X.shape[1])
b = np.random.randn(1,)
print(w, b)
n = len(X)
loss_list = []
for i in range(max_iter):
y_pred = w*X + b
Lw = -(2/n)*sum(X*(y - y_pred))
Lb = -(2/n)*sum(y - y_pred)
w = w - learning_rate*Lw
b = b - learning_rate*Lb
loss = np.square(np.subtract(y, y_pred)).mean()
loss_list.append(loss)
print(f"Epoch: {i}, loss: {loss}")
return w, b
x = list(range(1, 11))
y = []
for i in x:
y.append(i**2)
x, y = np.array(x).reshape(-1, 1), np.array(y)
w, b = SGD(x, y)
print("\n\n\n\n")
print(w)
print(b)
Loss of last iteration:
Epoch: 999, loss: 0.11521764208740602
Returned weight and bias respectively,
w: [0.00149535 0.00777379 0.01823786 0.03288755 0.05172286 0.07474381
0.10195038 0.13334257 0.1689204 0.20868384] # giving 10 values
b: [ 0.98958964 3.94588026 8.87303129 15.77104274 24.63991461 35.47964689
48.29023958 63.07169269 79.82400621 98.54718014] # giving 10 values
I am not understanding the cause, how this is happening?
Thanks!
I think this is because your y is a 1d row list, but y_pred is a 1xn column list, so subtracting them will give you an nxn matrix which you don't want. The fix is to just reshape y before you call your function like so:
import numpy as np
import matplotlib.pyplot as plt
def SGD(X, y, learning_rate=0.01, max_iter=1000):
w = np.random.randn(X.shape[1])
b = np.random.randn(1,)
print(w, b)
n = len(X)
loss_list = []
for i in range(max_iter):
y_pred = w*X + b
Lw = -(2/n)*sum(X*(y - y_pred))
Lb = -(2/n)*sum(y - y_pred)
w = w - learning_rate*Lw
b = b - learning_rate*Lb
loss = np.square(np.subtract(y, y_pred)).mean()
loss_list.append(loss)
print(f"Epoch: {i}, loss: {loss}")
return w, b
x = list(range(1, 11))
y = []
for i in x:
y.append(i**2)
x, y = np.array(x).reshape(-1, 1), np.array(y).reshape((-1, 1)) # Change is here
w, b = SGD(x, y)
print("\n\n\n\n")
print(w)
print(b)
and then w, b are:
[10.94655101]
[-21.6278976]
respectively

Problem reproducing the predicted covariance of a gaussian process using gpytorch with same hyperparameters

I need to build a function that gives the a posteriori covariance of a Gaussian Process. The idea is to train a GP using GPytorch, then take the learned hyperparameters, and pass them into my kernel function. (for several reason I can't use the GPyTorch directly).
Now the problem is that I can't reproduce the prediction. Here the code I wrote. I have been working on it the whole day but I can't find the problem. Do you know what I am doing wrong?
from gpytorch.mlls import ExactMarginalLogLikelihood
import numpy as np
import gpytorch
import torch
train_x1 = torch.linspace(0, 0.95, 50) + 0.05 * torch.rand(50)
train_y1 = torch.sin(train_x1 * (2 * np.pi)) + 0.2 * torch.randn_like(train_x1)
n_datapoints = train_x1.shape[0]
def kernel_rbf(x1, x2, c, l):
# my RBF function
if x1.shape is ():
x1 = np.atleast_2d(x1)
if x2.shape is ():
x2 = np.atleast_2d(x2)
return c * np.exp(- np.matmul((x1 - x2).T, (x1 - x2)) / (2 * l ** 2))
class ExactGPModel(gpytorch.models.ExactGP):
def __init__(self, train_x, train_y, likelihood):
super().__init__(train_x, train_y, likelihood)
lengthscale_prior = gpytorch.priors.GammaPrior(3.0, 6.0)
outputscale_prior = gpytorch.priors.GammaPrior(2.0, 0.15)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(
gpytorch.kernels.RBFKernel(lengthscale_prior=lengthscale_prior),
outputscale_prior=outputscale_prior)
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(train_x1, train_y1, likelihood)
# Find optimal model hyperparameters
model.train()
likelihood.train()
mll = ExactMarginalLogLikelihood(likelihood, model)
# Use the Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.1) # Includes GaussianLikelihood parameters
training_iterations = 50
for i in range(training_iterations):
optimizer.zero_grad()
output = model(*model.train_inputs)
loss = -mll(output, model.train_targets)
loss.backward()
print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
optimizer.step()
# Get the learned hyperparameters
outputscale = model.covar_module.outputscale.item()
lengthscale = model.covar_module.base_kernel.lengthscale.item()
noise = likelihood.noise_covar.noise.item()
train_x1 = train_x1.numpy()
train_y1 = train_y1.numpy()
# Get covariance train points
K = np.zeros((n_datapoints, n_datapoints))
for i in range(n_datapoints):
for j in range(n_datapoints):
K[i, j] = kernel_rbf(train_x1[i], train_x1[j], outputscale, lengthscale)
# Add noise
K += noise ** 2 * np.eye(n_datapoints)
# Get covariance train-test points
x_test = torch.rand(1, 1)
Ks = np.zeros((n_datapoints, 1))
for i in range(n_datapoints):
Ks[i] = kernel_rbf(train_x1[i], x_test.numpy(), outputscale, lengthscale)
# Get variance test points
Kss = kernel_rbf(x_test.numpy(), x_test.numpy(), outputscale, lengthscale)
L = np.linalg.cholesky(K)
v = np.linalg.solve(L, Ks)
var = Kss - np.matmul(v.T, v)
model.eval()
likelihood.eval()
with gpytorch.settings.fast_pred_var():
y_preds = likelihood(model(x_test))
print(f"Predicted variance with gpytorch:{y_preds.variance.item()}")
print(f"Predicted variance with my kernel:{var}")
I found the errors:
The noise is not squared so it is K += noise * np.eye(n_datapoints) and not K += noise**2 * np.eye(n_datapoints)
I forgot to add the noise term in the $$ K** $$, i.e. Kss += noise

How can i complete gradient descent algorithm code?

I am freshman & beginner.
I am studying machine learning with open tutorials.
I have a trouble with making gradient descent algorithm
I have to complete "for _ in range(max_iter):" but, I don't know about numpy... so I don't know what code should i add
Could you please help me fill the blank?
I know this type of question is so rude... sorry but I need your help :(
Thank you in advance.
from sklearn import datasets
import numpy as np
from sklearn.metrics import accuracy_score
X, y = datasets.make_classification(
n_samples = 200, n_features = 2, random_state = 333,
n_informative =2, n_redundant = 0 , n_clusters_per_class= 1)
def sigmoid(s):
return 1 / (1 + np.exp(-s))
def loss(y, h):
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
def gradient(X, y, w):
return -(y * X) / (1 + np.exp(-y * np.dot(X, w)))
X_bias = np.append(np.ones((X.shape[0], 1)), X, axis=1)
y = np.array([[1] if label == 0 else [0] for label in y])
w = np.array([[random.uniform(-1, 1)] for _ in range(X.shape[1]+1)])
max_iter = 100
learning_rate = 0.1
threshold = 0.5
for _ in range(max_iter):
#fill in the blank
what code should i add ????
probabilities = sigmoid(np.dot(X_bias, w))
predictions = [[1] if p > threshold else [0] for p in probabilities]
print("loss: %.2f, accuracy: %.2f" %
(loss(y, probabilities), accuracy_score(y, predictions)))
Inside the for loop, we have to first compute the probabilities. Then find the gradients and then update the weights.
For computing probabilities, you can use the code below
probs=sigmoid(np.dot(X_bias,w))
np.dot is numpy command for matrix multiplication. Then we will calculate the loss and its gradients.
J=loss(y,probs)
dJ=gradient(X_bias,y,w)
Now we will update the weights.
w=w-learning_rate*dJ
So the final code will be
from sklearn import datasets
import numpy as np
from sklearn.metrics import accuracy_score
X, y = datasets.make_classification(
n_samples = 200, n_features = 2, random_state = 333,
n_informative =2, n_redundant = 0 , n_clusters_per_class= 1)
def sigmoid(s):
return 1 / (1 + np.exp(-s))
def loss(y, h):
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
def gradient(X, y, w):
return -(y * X) / (1 + np.exp(-y * np.dot(X, w)))
X_bias = np.append(np.ones((X.shape[0], 1)), X, axis=1)
y = np.array([[1] if label == 0 else [0] for label in y])
w = np.array([[np.random.uniform(-1, 1)] for _ in range(X.shape[1]+1)])
max_iter = 100
learning_rate = 0.1
threshold = 0.5
for _ in range(max_iter):
probs=sigmoid(np.dot(X_bias,w))
J=loss(y,probs)
dJ=gradient(X_bias,y,w)
w=w-learning_rate*dJ
probabilities = sigmoid(np.dot(X_bias, w))
predictions = [[1] if p > threshold else [0] for p in probabilities]
print("loss: %.2f, accuracy: %.2f" %
(loss(y, probabilities), accuracy_score(y, predictions)))
Note: In the for loop, there is no need to compute probs and loss, As we only need gradients to update the weights. I did that because it will be easy to understand.

Categories