I have the following code snippet:
def func1(self, X, y):
#X.shape = (455,13)
#y.shape = (455)
num_examples, num_features = np.shape(X)
self.weights = np.random.uniform(-1 / (2 * num_examples), 1 / (2 * num_examples), num_features)
while condition:
new_weights = np.zeros(num_features)
K = (np.dot(X, self.weights) - y)
for j in range(num_features):
summ = 0
for i in range(num_examples):
summ += K[i] * X[i][j]
new_weights[j] = self.weights[j] - ((self.alpha / num_examples) * summ)
self.weights = new_weights
This code works too slow. Is there any optimization, which I can do?
You can efficiently use np.einsum(). See a testing version below:
def func2(X, y):
num_examples, num_features = np.shape(X)
weights = np.random.uniform(-1./(2*num_examples), 1./(2*num_examples), num_features)
K = (np.dot(X, weights) - y)
return weights - alpha/num_examples*np.einsum('i,ij->j', K, X)
You can get new_weights directly using matrix-multiplication with np.dot like so -
new_weights = self.weights- ((self.alpha / num_examples) * np.dot(K[None],X))
Related
I am trying to compute the function below with theano/aesara in an preferably vectorized manner:
![image|620x182](upload://9Px5wAGjZdkBXVBg4fqmuSPorPr.png)
The solution i have is not vectorized and therefore way too slow:
def apply_adstock_with_lag(x, L, P, D):
"""
params:
x: original array
L: length
P: peak, delay in effect
D: decay, retain
"""
x = np.append(np.zeros(L - 1), x)
weights = [0 for _ in range(L)]
for l in range(L):
weight = D ** ((l - P) ** 2)
weights[L - 1 - l] = weight
weights = np.array(weights)
adstocked_x = []
for i in range(L - 1, len(x)):
x_array = x[i - L + 1:i + 1]
xi = sum(x_array * weights) / sum(weights)
adstocked_x.append(xi)
adstocked_x = tt.as_tensor_variable(adstocked_x)
return adstocked_x
An similar function although simplier and its vectorized solution can be found below, note that this is much much quicker probably due to the vectorized operations:
![image|252x39](upload://ucZeqCmCXcBRAHLdA7lJ0crs1Oz.png)
def adstock_geometric_theano_pymc3(x, theta):
x = tt.as_tensor_variable(x)
def adstock_geometric_recurrence_theano(index, input_x, decay_x, theta):
return tt.set_subtensor(decay_x[index], tt.sum(input_x + theta * decay_x[index - 1]))
len_observed = x.shape[0]
x_decayed = tt.zeros_like(x)
x_decayed = tt.set_subtensor(x_decayed[0], x[0])
output, _ = theano.scan(
fn=adstock_geometric_recurrence_theano,
sequences=[tt.arange(1, len_observed), x[1:len_observed]],
outputs_info=x_decayed,
non_sequences=theta,
n_steps=len_observed - 1
)
return output[-1]
I cant come up with the vectorized solution to my adstock-function, can anyone give it a go?
Have you tried:
def apply_adstock_with_lag(x, L, P, D):
adstocked_x = np.convolve(x, D**((np.arange(0, L, 1) - P)**2))[:-(L-1)] / sum(D**((np.arange(0, L, 1) - P)**2))
adstocked_x = at.as_tensor_variable(adstocked_x)
return adstocked_x
This should work
I'm new to machine learning and am trying to implement gradient descent. The code I have looks like this and has been resulting in NaN values for all parameters:
def compute_error_for_line_given_points(b,m,points):
totalError = 0 #sum of square error formula
for i in range (0, len(points)):
x = points[i, 0]
y = points[i, 1]
totalError += (y-(m*x + b)) ** 2
return totalError/ float(len(points))
def step_gradient(b_current, m_current, points, learning_rate):
#gradient descent
b_gradient = 0
m_gradient = 0
N = float(len(points))
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
b_gradient += -(2/N) * (y - (m_current * x + b_current))
m_gradient += -(2/N) * x * (y - (m_current * x + b_current))
new_b = b_current - (learning_rate * b_gradient)
new_m = m_current - (learning_rate * m_gradient)
return [new_b,new_m]
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
b = starting_b
m = starting_m
for i in range(num_iterations):
b,m = step_gradient(b, m, array(points), learning_rate)
return [b,m]
def run():
#Step 1: Collect the data
points = genfromtxt("C:/Users/mishruti/Downloads/For Linear Regression.csv", delimiter = ",")
#Step 2: Define our Hyperparameters
learning_rate = 0.0000001 #how fast the data converge
#y=mx+b (Slope formule)
initial_b = 0 # initial y-intercept guess
initial_m = 0 # initial slope guess
num_iterations = 4
print("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
print("Running...")
[b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
print("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))
# main function
if __name__ == "__main__":
run()
A sample from my data set is attached. Can someone please help me figure this out? Thanks!
I'm starting the ML journey and I'm having troubles with this coding exercise
here is my code
import numpy as np
import pandas as pd
import scipy.optimize as op
# Read the data and give it labels
data = pd.read_csv('ex2data2.txt', header=None, name['Test1', 'Test2', 'Accepted'])
# Separate the features to make it fit into the mapFeature function
X1 = data['Test1'].values.T
X2 = data['Test2'].values.T
# This function makes more features (degree)
def mapFeature(x1, x2):
degree = 6
out = np.ones((x1.shape[0], sum(range(degree + 2))))
curr_column = 1
for i in range(1, degree + 1):
for j in range(i+1):
out[:,curr_column] = np.power(x1, i-j) * np.power(x2, j)
curr_column += 1
return out
# Separate the data into training and target, also initialize theta
X = mapFeature(X1, X2)
y = np.matrix(data['Accepted'].values).T
m, n = X.shape
cols = X.shape[1]
theta = np.matrix(np.zeros(cols))
#Initialize the learningRate(sigma)
learningRate = 1
# Define the Sigmoid Function (Output between 0 and 1)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def cost(theta, X, y, learningRate):
# This is require to make the optimize function work
theta = theta.reshape(-1, 1)
error = sigmoid(X # theta)
first = np.multiply(-y, np.log(error))
second = np.multiply(1 - y, np.log(1 - error))
j = np.sum((first - second)) / m + (learningRate * np.sum(np.power(theta, 2)) / 2 * m)
return j
# Define the gradient of the cost function
def gradient(theta, X, y, learningRate):
# This is require to make the optimize function work
theta = theta.reshape(-1, 1)
error = sigmoid(X # theta)
grad = (X.T # (error - y)) / m + ((learningRate * theta) / m)
grad_no = (X.T # (error - y)) / m
grad[0] = grad_no[0]
return grad
Result = op.minimize(fun=cost, x0=theta, args=(X, y, learningRate), method='TNC', jac=gradient)
opt_theta = np.matrix(Result.x)
def predict(theta, X):
sigValue = sigmoid(X # theta.T)
p = sigValue >= 0.5
return p
p = predict(opt_theta, X)
print('Train Accuracy: {:f}'.format(np.mean(p == y) * 100))
So, when the learningRate = 1, the accuracy should be around 83,05% but I'm getting 80.5% and when the learningRate = 0, the accuracy should be 91.52% but I'm getting 87.28%
So the question is What am I doing wrong? Why my accuracy is below the problem default answer?
Hope someone can guide me in the right direction. Thanks!
P.D: Here is the dataset, maybe it can help
https://raw.githubusercontent.com/TheGirlWhiteWithBandages/Machine-Learning-Algorithms/master/Logistic%20Regression/ex2data2.txt
Hey guys I found a way to make it even better!
Here is the code
import numpy as np
import pandas as pd
import scipy.optimize as op
from sklearn.preprocessing import PolynomialFeatures
# Read the data and give it labels
data = pd.read_csv('ex2data2.txt', header=None, names=['Test1', 'Test2', 'Accepted'])
# Separate the data into training and target
X = (data.iloc[:, 0:2]).values
y = (data.iloc[:, 2:3]).values
# Modify the features to a certain degree (Polynomial)
poly = PolynomialFeatures(6)
m = y.size
XX = poly.fit_transform(data.iloc[:, 0:2].values)
# Initialize Theta
theta = np.zeros(XX.shape[1])
# Define the Sigmoid Function (Output between 0 and 1)
def sigmoid(z):
return(1 / (1 + np.exp(-z)))
# Define the Regularized cost function
def costFunctionReg(theta, reg, *args):
# This is require to make the optimize function work
h = sigmoid(XX # theta)
first = np.log(h).T # - y
second = np.log(1 - h).T # (1 - y)
J = (1 / m) * (first - second) + (reg / (2 * m)) * np.sum(np.square(theta[1:]))
return J
# Define the Regularized gradient function
def gradientReg(theta, reg, *args):
theta = theta.reshape(-1, 1)
h = sigmoid(XX # theta)
grad = (1 / m) * (XX.T # (h - y)) + (reg / m) * np.r_[[[0]], theta[1:]]
return grad.flatten()
# Define the predict Function
def predict(theta, X):
sigValue = sigmoid(X # theta.T)
p = sigValue >= 0.5
return p
# A loop to test between different values for sigma (reg parameter)
for i, Sigma in enumerate([0, 1, 100]):
# Optimize costFunctionReg
res2 = op.minimize(costFunctionReg, theta, args=(Sigma, XX, y), method=None, jac=gradientReg)
# Get the accuracy of the model
accuracy = 100 * sum(predict(res2.x, XX) == y.ravel()) / y.size
# Get the Error between different weights
error1 = costFunctionReg(res2.x, Sigma, XX, y)
# print the accuracy and error
print('Train accuracy {}% with Lambda = {}'.format(np.round(accuracy, decimals=4), Sigma))
print(error1)
Thanks for all your help!
try out this:
# import library
import pandas as pd
import numpy as np
dataset = pd.read_csv('ex2data2.csv',names = ['Test #1','Test #2','Accepted'])
# splitting to x and y variables for features and target variable
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values
print('x[0] ={}, y[0] ={}'.format(x[0],y[0]))
m, n = x.shape
print('#{} Number of training samples, #{} features per sample'.format(m,n))
# import library FeatureMapping
from sklearn.preprocessing import PolynomialFeatures
# We also add one column of ones to interpret theta 0 (x with power of 0 = 1) by
include_bias as True
pf = PolynomialFeatures(degree = 6, include_bias = True)
x_poly = pf.fit_transform(x)
pd.DataFrame(x_poly).head(5)
m,n = x_poly.shape
# define theta as zero
theta = np.zeros(n)
# define hyperparameter λ
lambda_ = 1
# reshape (-1,1) because we just have one feature in y column
y = y.reshape(-1,1)
def sigmoid(z):
return 1/(1+np.exp(-z))
def lr_hypothesis(x,theta):
return np.dot(x,theta)
def compute_cost(theta,x,y,lambda_):
theta = theta.reshape(n,1)
infunc1 = -y*(np.log(sigmoid(lr_hypothesis(x,theta)))) - ((1-y)*(np.log(1 - sigmoid(lr_hypothesis(x,theta)))))
infunc2 = (lambda_*np.sum(theta[1:]**2))/(2*m)
j = np.sum(infunc1)/m+ infunc2
return j
# gradient[0] correspond to gradient for theta(0)
# gradient[1:] correspond to gradient for theta(j) j>0
def compute_gradient(theta,x,y,lambda_):
gradient = np.zeros(n).reshape(n,)
theta = theta.reshape(n,1)
infunc1 = sigmoid(lr_hypothesis(x,theta))-y
gradient_in = np.dot(x.transpose(),infunc1)/m
gradient[0] = gradient_in[0,0] # theta(0)
gradient[1:] = gradient_in[1:,0]+(lambda_*theta[1:,]/m).reshape(n-1,) # theta(j) ; j>0
gradient = gradient.flatten()
return gradient
You can now test your cost and gradient without optimization. Th below code will optimize the model:
# hyperparameters
m,n = x_poly.shape
# define theta as zero
theta = np.zeros(n)
# define hyperparameter λ
lambda_array = [0, 1, 10, 100]
import scipy.optimize as opt
for i in range(0,len(lambda_array)):
# Train
print('======================================== Iteration {} ===================================='.format(i))
optimized = opt.minimize(fun = compute_cost, x0 = theta, args = (x_poly, y,lambda_array[i]),
method = 'TNC', jac = compute_gradient)
new_theta = optimized.x
# Prediction
y_pred_train = predictor(x_poly,new_theta)
cm_train = confusion_matrix(y,y_pred_train)
t_train,f_train,acc_train = acc(cm_train)
print('With lambda = {}, {} correct, {} wrong ==========> accuracy = {}%'
.format(lambda_array[i],t_train,f_train,acc_train*100))
Now you should see output like this :
=== Iteration 0 === With lambda = 0, 104 correct, 14 wrong ==========> accuracy = 88.13559322033898%
=== Iteration 1 === With lambda = 1, 98 correct, 20 wrong ==========> accuracy = 83.05084745762711%
=== Iteration 2 === With lambda = 10, 88 correct, 30 wrong ==========> accuracy = 74.57627118644068%
=== Iteration 3 === With lambda = 100, 72 correct, 46 wrong ==========> accuracy = 61.016949152542374%
The code below is trying to do linear implementations, similar to numpy.interp(). But it is quite slow and I think the reason is that some operations in the code do not have GPU implementations. But I don't know which one. Could anyone tell me and suggest some solutions?
def tf_interp(b, x, y):
xaxis_pad = tf.concat([[tf.minimum(b, tf.gather(x, 0))], x, [tf.maximum(b, tf.gather(x, x.get_shape()[0] - 1))]],
axis=0)
yaxis_pad = tf.concat([[0.0], y, [0.0]], axis=0)
cmp = tf.cast(b >= xaxis_pad, dtype=tf.float32)
diff = cmp[1:] - cmp[:-1]
idx = tf.argmin(diff)
# Interpolate
alpha = (b - xaxis_pad[idx]) / (xaxis_pad[idx + 1] - xaxis_pad[idx])
res = alpha * yaxis_pad[idx + 1] + (1 - alpha) * yaxis_pad[idx]
def f1(): return 0.0
def f2(): return alpha * yaxis_pad[idx + 1] + (1 - alpha) * yaxis_pad[idx]
res = tf.cond(pred=tf.is_nan(res), true_fn=f1, false_fn=f2)
return res
def tf_interpolation(t, x, y):
t = tf.cast(t, tf.float32)
x = tf.cast(x, tf.float32)
y = tf.cast(y, tf.float32)
t1 = tf.reshape(t, [-1, ])
t_return = tf.map_fn(lambda b: tf_interp(b, x, y), t1)
t_return = tf.reshape(t_return, [t.get_shape()[0], t.get_shape()[1]])
return t_return
I have been trying to code logistic regression from scratch, which I have done, but I am using all the features in my breast cancer dataset, and I would like to select some features (specifically ones that I've found scikit-learn has selected for itself when I compare with it and use its feature selection on the data). However, I am not sure where to do this in my code, what I currently have is this:
X_train = ['texture_mean', 'smoothness_mean', 'compactness_mean', 'symmetry_mean', 'radius_se', 'symmetry_se'
'fractal_dimension_se', 'radius_worst', 'texture_worst', 'area_worst', 'smoothness_worst', 'compactness_worst']
X_test = ['texture_mean', 'smoothness_mean', 'compactness_mean', 'symmetry_mean', 'radius_se', 'symmetry_se'
'fractal_dimension_se', 'radius_worst', 'texture_worst', 'area_worst', 'smoothness_worst', 'compactness_worst']
def Sigmoid(z):
return 1/(1 + np.exp(-z))
def Hypothesis(theta, X):
return Sigmoid(X # theta)
def Cost_Function(X,Y,theta,m):
hi = Hypothesis(theta, X)
_y = Y.reshape(-1, 1)
J = 1/float(m) * np.sum(-_y * np.log(hi) - (1-_y) * np.log(1-hi))
return J
def Cost_Function_Derivative(X,Y,theta,m,alpha):
hi = Hypothesis(theta,X)
_y = Y.reshape(-1, 1)
J = alpha/float(m) * X.T # (hi - _y)
return J
def Gradient_Descent(X,Y,theta,m,alpha):
new_theta = theta - Cost_Function_Derivative(X,Y,theta,m,alpha)
return new_theta
def Accuracy(theta):
correct = 0
length = len(X_test)
prediction = (Hypothesis(theta, X_test) > 0.5)
_y = Y_test.reshape(-1, 1)
correct = prediction == _y
my_accuracy = (np.sum(correct) / length)*100
print ('LR Accuracy: ', my_accuracy, "%")
def Logistic_Regression(X,Y,alpha,theta,num_iters):
m = len(Y)
for x in range(num_iters):
new_theta = Gradient_Descent(X,Y,theta,m,alpha)
theta = new_theta
if x % 100 == 0:
print #('theta: ', theta)
print #('cost: ', Cost_Function(X,Y,theta,m))
Accuracy(theta)
ep = .012
initial_theta = np.random.rand(X_train.shape[1],1) * 2 * ep - ep
alpha = 0.5
iterations = 10000
Logistic_Regression(X_train,Y_train,alpha,initial_theta,iterations)
I was assuming that if I manually change what features X_train and X_test consist of this would work, but I get an error: AttributeError: 'list' object has no attribute 'shape' at the initial_theta line. Any help in the right direction would be appreciated.
the problem is that X_train is a list and shape only work for dataframes.
you could either:
-keep the list but use len(X_train) instead, OR
-change the X_train type to a pandas dataframe, pandas.DataFrame(X_train).shape[0]