I am trying to solve the problem of training a binary classification problem with target variables {-1, 1} using the dual-hinge loss.
The optimization problem that I am trying to solve is as follows:
where
My code is as follows:
import sklearn
from sklearn.datasets import load_breast_cancer
from sklearn import linear_model,metrics
import numpy as np
import matplotlib.pyplot as plt
data = load_breast_cancer()
x = data.data
x = sklearn.preprocessing.scale(x)
y = np.sign(data.target.reshape(x.shape[0], 1) - 0.5)
c = 10
rho = 0.5
eta = 0.5
grad_tol = 1e-1
def l_h(z):
return np.maximum(0, 1 - z)
def dl2_h(z):
return -2 * l_h(z)
def obj(w, b, y, x):
return (1/2) * np.linalg.norm(w)**2 + c * np.sum(l_h(y * (x#w+b))**2)
def grad_w(w, b, y, x):
return w + c * ((np.transpose(y) * np.transpose(x)) # (dl2_h(y * (x#w+b))))
However whe I try to test my gradiant with the follwoing code I get two different answers:
w = np.random.normal(0,1,x.shape[1])
w = w.reshape((x.shape[1],1))
b = np.random.normal(0,1)
epsilon = np.zeros((x.shape[1],1)
epsilon[0] = 0.0001
grad_estiamt = (obj(w + epsilon , b , y, x) - obj(w , b, y, x))/epsilon[0]
//compare grad_estimate with grad_w(w, b, y, x)[0]. They are not the same!!!
The values for the above two expressions should be very close if the gradient was right, but they are not. Can some one please tell me what where I am making a mistake?
Related
I have a problem where i have to Create a dataset ,
Afterwards,I have to use Theano to get the w_0 and w_1 parameters of the following model:
y = log(1 + w_0 * |x|) + (w_1 * |x|)
the datasets are created and i have computed the w_0 and w_1 values but with numpy using the following code but I have studied throughly but don't know how to compute w_0 and w_1 values with theano .. how can I compute these using theano?
It will be great help thankyou :)
code that i am using :
import numpy as np
import math
import theano as t
#code to generate datasets
trX = np.linspace(-1, 1, 101)
trY = np.linspace(-1, 1, 101)
for i in range(len(trY)):
trY[i] = math.log(1 + 0.5 * abs(trX[i])) + trX[i] / 3 + np.random.randn() * 0.033
#code that produce w0 w1 and i want to compute it with theano
X = np.column_stack((np.ones(101, dtype=trX.dtype), trX))
print(X.shape)
Xplus = np.linalg.pinv(X) #pseudo-inverse of X
w_opt = Xplus # trY #The # symbol denotes matrix multiplication
print(w_opt)
x = abs(trX) #abs is a built in function to return positive values in a array
y= trY
for i in range(len(trX)):
y[i] = math.log(1 + w_opt[0] * x[i]) + (w_opt[1] * x[i])
Good morning Hina Malik,
Using the gradient descent algorithm and with the right model selection, this problem should be solved. also, you should create 2 shared variables (w & c) one for each parameter.
X = T.scalar()
Y = T.scalar()
def model(X, w, c):
return X * w + c
w = theano.shared(np.asarray(0., dtype = theano.config.floatX))
c = theano.shared(np.asarray(0., dtype = theano.config.floatX))
y = model(X, w, c)
learning_rate=0.01
cost = T.mean(T.sqr(y - Y))
gradient_w = T.grad(cost = cost, wrt = w)
gradient_c = T.grad(cost = cost, wrt = c)
updates = [[w, w - gradient_w * learning_rate], [c, c - gradient_c * learning_rate]]
train = theano.function(inputs = [X, Y], outputs = cost, updates = updates)
coste=[] #Variable para almacenar los datos de coste para poder representarlos gráficamente
for i in range(101):
for x, y in zip(trX, trY):
cost_i = train(x, y)
coste.append(cost_i)
w0=float(w.get_value())
w1=float(c.get_value())
print(w0,w1)
I replied also to the same or very similar topic in the 'Spanish' version of StackOverFlow here: go to solution
I hope this can help you
Best regards
I am trying to find a fit to a specific heat data using gammaT+mDebye_model+(1-m)*Einstein model as given below.
Cel+ph(T ) = γ T + [αCDebye(T ) + (1 − α)CEinstein(T )]
where the Debye and Einstein models are given by eq. 3 and 4 in the attachment.
I have tried the following code in jupyter notebook following some examples on the web but i have no idea how can i combine these functions together to carry out the fit.
The data is linked https://www.dropbox.com/s/u0r2m3zwl8w77at/HC_ScPtBi.dat?dl=0
Column 1 is Temperature and Column 3 is Y data of interest.
Model is in https://www.dropbox.com/s/9452fq7eydajr5o/Debye.pdf?dl=0
Code is in https://www.dropbox.com/s/hk9b1t0agvt36zn/Untitled2.ipynb?dl=0
from matplotlib import pyplot
import numpy as np
from scipy import integrate
from scipy.optimize import curve_fit
from scipy.integrate import quad
data=np.genfromtxt('HC_ScPtBi.dat', skip_header=1)
R=8.314
n=3
M=1
T=data[10:290,0]
c=data[10:290,2]
def plot_data():
pyplot.scatter(T, c)
pyplot.xlabel('$T [K]$')
pyplot.ylabel('$C$')
plot_data()
def c_einstein(T, T_E):
x = T_E / T
return 3 *n*R*x**2 * np.exp(x) / (np.exp(x) - 1)**2
popt0, pcov0 = curve_fit(c_einstein, T, c, 250)
T_E = popt0[0]
delta_T_E = np.sqrt(pcov0[0, 0])
print(f"T_E = {T_E:.5} ± {delta_T_E:.3} K")
print(popt0)
plot_data()
#temps = np.linspace(10, T[-1], 100)
pyplot.plot(T, c_einstein(T, *popt0));
def integrand(y):
return y**4 * np.exp(y) / (np.exp(y) - 1)**2
#np.vectorize
def c_debye(T, T_D):
x = T / T_D
return 9 *n*R*x**3 * quad(integrand, 0, 1/x)[0]
popt1, pcov1 = curve_fit(c_debye, T, c, 150)
T_D = popt1[0]
delta_T_D = np.sqrt(pcov1[0, 0])
print(f"T_D = {T_D:.5} ± {delta_T_D:.3} K")
print(popt1)
plot_data()
pyplot.plot(T, c_einstein(T, *popt0), label='Einstein')
pyplot.plot(T, c_debye(T, *popt1), label='Debye')
pyplot.legend();
If it might be of any use, I obtained an excellent fit to a modified Weibull peak equation, with R-squared = 0.99999 and RMSE = 0.06878.
def Peak_WeibullPeak_Modified_model(x): # from zunzun.com
a = 6.4654735487019195E+01
b = 3.4517137038577323E+02
c = -1.5940608784806631E+00
d = 2.7331145870203617E+00
return = a * numpy.exp(-0.5 * numpy.power(numpy.log(x/b) / c, d))
You need to combine the Einstein and Debye equations into a single function, which should look something like this:
def func(T, alpha,gamma,T_e,T_d):
fn = lambda y: y**4 * np.exp(y) / (np.exp(y) - 1)**2
einst = (1-alpha)*3*n*R*T_e**2/T**2 * np.exp(T_e/T) / (np.exp(T_e/T) - 1)**2
debye_int = np.array([integrate.quad(fn, 0, T_d/t)[0] for t in T])
debye = alpha*9*n*R*T**3/T_d**3*debye_int
return einst+debye+gamma*T
You can then use that function in the curve fitting
coefs = curve_fit(func, T, c)[0]
plt.plot(T, func(T, *coefs))
I'm starting the ML journey and I'm having troubles with this coding exercise
here is my code
import numpy as np
import pandas as pd
import scipy.optimize as op
# Read the data and give it labels
data = pd.read_csv('ex2data2.txt', header=None, name['Test1', 'Test2', 'Accepted'])
# Separate the features to make it fit into the mapFeature function
X1 = data['Test1'].values.T
X2 = data['Test2'].values.T
# This function makes more features (degree)
def mapFeature(x1, x2):
degree = 6
out = np.ones((x1.shape[0], sum(range(degree + 2))))
curr_column = 1
for i in range(1, degree + 1):
for j in range(i+1):
out[:,curr_column] = np.power(x1, i-j) * np.power(x2, j)
curr_column += 1
return out
# Separate the data into training and target, also initialize theta
X = mapFeature(X1, X2)
y = np.matrix(data['Accepted'].values).T
m, n = X.shape
cols = X.shape[1]
theta = np.matrix(np.zeros(cols))
#Initialize the learningRate(sigma)
learningRate = 1
# Define the Sigmoid Function (Output between 0 and 1)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def cost(theta, X, y, learningRate):
# This is require to make the optimize function work
theta = theta.reshape(-1, 1)
error = sigmoid(X # theta)
first = np.multiply(-y, np.log(error))
second = np.multiply(1 - y, np.log(1 - error))
j = np.sum((first - second)) / m + (learningRate * np.sum(np.power(theta, 2)) / 2 * m)
return j
# Define the gradient of the cost function
def gradient(theta, X, y, learningRate):
# This is require to make the optimize function work
theta = theta.reshape(-1, 1)
error = sigmoid(X # theta)
grad = (X.T # (error - y)) / m + ((learningRate * theta) / m)
grad_no = (X.T # (error - y)) / m
grad[0] = grad_no[0]
return grad
Result = op.minimize(fun=cost, x0=theta, args=(X, y, learningRate), method='TNC', jac=gradient)
opt_theta = np.matrix(Result.x)
def predict(theta, X):
sigValue = sigmoid(X # theta.T)
p = sigValue >= 0.5
return p
p = predict(opt_theta, X)
print('Train Accuracy: {:f}'.format(np.mean(p == y) * 100))
So, when the learningRate = 1, the accuracy should be around 83,05% but I'm getting 80.5% and when the learningRate = 0, the accuracy should be 91.52% but I'm getting 87.28%
So the question is What am I doing wrong? Why my accuracy is below the problem default answer?
Hope someone can guide me in the right direction. Thanks!
P.D: Here is the dataset, maybe it can help
https://raw.githubusercontent.com/TheGirlWhiteWithBandages/Machine-Learning-Algorithms/master/Logistic%20Regression/ex2data2.txt
Hey guys I found a way to make it even better!
Here is the code
import numpy as np
import pandas as pd
import scipy.optimize as op
from sklearn.preprocessing import PolynomialFeatures
# Read the data and give it labels
data = pd.read_csv('ex2data2.txt', header=None, names=['Test1', 'Test2', 'Accepted'])
# Separate the data into training and target
X = (data.iloc[:, 0:2]).values
y = (data.iloc[:, 2:3]).values
# Modify the features to a certain degree (Polynomial)
poly = PolynomialFeatures(6)
m = y.size
XX = poly.fit_transform(data.iloc[:, 0:2].values)
# Initialize Theta
theta = np.zeros(XX.shape[1])
# Define the Sigmoid Function (Output between 0 and 1)
def sigmoid(z):
return(1 / (1 + np.exp(-z)))
# Define the Regularized cost function
def costFunctionReg(theta, reg, *args):
# This is require to make the optimize function work
h = sigmoid(XX # theta)
first = np.log(h).T # - y
second = np.log(1 - h).T # (1 - y)
J = (1 / m) * (first - second) + (reg / (2 * m)) * np.sum(np.square(theta[1:]))
return J
# Define the Regularized gradient function
def gradientReg(theta, reg, *args):
theta = theta.reshape(-1, 1)
h = sigmoid(XX # theta)
grad = (1 / m) * (XX.T # (h - y)) + (reg / m) * np.r_[[[0]], theta[1:]]
return grad.flatten()
# Define the predict Function
def predict(theta, X):
sigValue = sigmoid(X # theta.T)
p = sigValue >= 0.5
return p
# A loop to test between different values for sigma (reg parameter)
for i, Sigma in enumerate([0, 1, 100]):
# Optimize costFunctionReg
res2 = op.minimize(costFunctionReg, theta, args=(Sigma, XX, y), method=None, jac=gradientReg)
# Get the accuracy of the model
accuracy = 100 * sum(predict(res2.x, XX) == y.ravel()) / y.size
# Get the Error between different weights
error1 = costFunctionReg(res2.x, Sigma, XX, y)
# print the accuracy and error
print('Train accuracy {}% with Lambda = {}'.format(np.round(accuracy, decimals=4), Sigma))
print(error1)
Thanks for all your help!
try out this:
# import library
import pandas as pd
import numpy as np
dataset = pd.read_csv('ex2data2.csv',names = ['Test #1','Test #2','Accepted'])
# splitting to x and y variables for features and target variable
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values
print('x[0] ={}, y[0] ={}'.format(x[0],y[0]))
m, n = x.shape
print('#{} Number of training samples, #{} features per sample'.format(m,n))
# import library FeatureMapping
from sklearn.preprocessing import PolynomialFeatures
# We also add one column of ones to interpret theta 0 (x with power of 0 = 1) by
include_bias as True
pf = PolynomialFeatures(degree = 6, include_bias = True)
x_poly = pf.fit_transform(x)
pd.DataFrame(x_poly).head(5)
m,n = x_poly.shape
# define theta as zero
theta = np.zeros(n)
# define hyperparameter λ
lambda_ = 1
# reshape (-1,1) because we just have one feature in y column
y = y.reshape(-1,1)
def sigmoid(z):
return 1/(1+np.exp(-z))
def lr_hypothesis(x,theta):
return np.dot(x,theta)
def compute_cost(theta,x,y,lambda_):
theta = theta.reshape(n,1)
infunc1 = -y*(np.log(sigmoid(lr_hypothesis(x,theta)))) - ((1-y)*(np.log(1 - sigmoid(lr_hypothesis(x,theta)))))
infunc2 = (lambda_*np.sum(theta[1:]**2))/(2*m)
j = np.sum(infunc1)/m+ infunc2
return j
# gradient[0] correspond to gradient for theta(0)
# gradient[1:] correspond to gradient for theta(j) j>0
def compute_gradient(theta,x,y,lambda_):
gradient = np.zeros(n).reshape(n,)
theta = theta.reshape(n,1)
infunc1 = sigmoid(lr_hypothesis(x,theta))-y
gradient_in = np.dot(x.transpose(),infunc1)/m
gradient[0] = gradient_in[0,0] # theta(0)
gradient[1:] = gradient_in[1:,0]+(lambda_*theta[1:,]/m).reshape(n-1,) # theta(j) ; j>0
gradient = gradient.flatten()
return gradient
You can now test your cost and gradient without optimization. Th below code will optimize the model:
# hyperparameters
m,n = x_poly.shape
# define theta as zero
theta = np.zeros(n)
# define hyperparameter λ
lambda_array = [0, 1, 10, 100]
import scipy.optimize as opt
for i in range(0,len(lambda_array)):
# Train
print('======================================== Iteration {} ===================================='.format(i))
optimized = opt.minimize(fun = compute_cost, x0 = theta, args = (x_poly, y,lambda_array[i]),
method = 'TNC', jac = compute_gradient)
new_theta = optimized.x
# Prediction
y_pred_train = predictor(x_poly,new_theta)
cm_train = confusion_matrix(y,y_pred_train)
t_train,f_train,acc_train = acc(cm_train)
print('With lambda = {}, {} correct, {} wrong ==========> accuracy = {}%'
.format(lambda_array[i],t_train,f_train,acc_train*100))
Now you should see output like this :
=== Iteration 0 === With lambda = 0, 104 correct, 14 wrong ==========> accuracy = 88.13559322033898%
=== Iteration 1 === With lambda = 1, 98 correct, 20 wrong ==========> accuracy = 83.05084745762711%
=== Iteration 2 === With lambda = 10, 88 correct, 30 wrong ==========> accuracy = 74.57627118644068%
=== Iteration 3 === With lambda = 100, 72 correct, 46 wrong ==========> accuracy = 61.016949152542374%
Writing this algorithm for my final year project. Used gradient descent to find the best fit line. I tried solving it with excel too using Multi-regression. The values are different.
The csv file is attached here https://drive.google.com/file/d/1-UaU34w3c5-VunYrVz9fD7vRb0c-XDqk/view?usp=sharing. The first 3 columns are independent variables (x1,x2,x3) and the last is dependent (y).
Its a different question, If you could explain why the answer is different from excel values?
import numpy as np
import random
import pandas as pd
def gradientDescent(x, y, theta, alpha, m, numIterations):
xTrans = x.transpose()
for i in range(0, numIterations):
hypothesis = np.dot(x, theta)
loss = hypothesis - y
cost = np.sum(loss ** 2) / (2 * m)
print("Iteration %d | Cost: %f" % (i, cost))
gradient = np.dot(xTrans, loss) / m
theta = theta - alpha * gradient
return theta
df = pd.read_csv(r'C:\Users\WELCOME\Desktop\FinalYearPaper\ConferencePaper\NewTrain.csv', 'rU', delimiter=",",header=None)
df.columns = ['x0','Speed','Feed','DOC','Roughness']
print(df)
y = np.array(df['Roughness'])
#x = np.array(d)
x = np.array(df.drop(['Roughness'],1))
#x[:,2:3] = 1.0
print (x)
print(y)
m, n = np.shape(x)
print(m,n)
numIterations= 50000
alpha = 0.000001
theta = np.ones(n)
theta = gradientDescent(x, y, theta, alpha, m, numIterations)
print(theta)
Can I get the vaule of RMSE from scipy.optimize.leastsq module ?
Here's a little example using leastsq:
import numpy as np
import scipy.optimize as optimize
import collections
x = np.array([821,576,473,377,326,300])
y = np.array([255,235,208,166,157,140])
def sigmoid(p,x):
x0,y0,c,k=p
y = c / (1 + np.exp(-k*(x-x0))) + y0
return y
def residuals(p,x,y):
return y - sigmoid(p,x)
Param=collections.namedtuple('Param','x0 y0 c k')
p_guess=Param(x0=600,y0=200,c=100,k=0.01)
p,cov,infodict,mesg,ier = optimize.leastsq(
residuals,p_guess,args=(x,y),full_output=1,warning=True)
p=Param(*p)
xp = np.linspace(100, 1600, 1500)
print('''\
x0 = {p.x0}
y0 = {p.y0}
c = {p.c}
k = {p.k}
'''.format(p=p))
You could compute the residuals this way:
resid=residuals(p,x,y)
print(resid)
# [ 0.76205302 -2.010142 2.60265297 -3.02849144 1.6739274 ]
But you don't have to compute resid -- infodict['fvec'] already contains the info.
print(infodict['fvec'])
# [ 0.76205302 -2.010142 2.60265297 -3.02849144 1.6739274 ]
chisq=(infodict['fvec']**2).sum()
# dof is degrees of freedom
dof=len(x)-len(p)
rmse=np.sqrt(chisq/dof)
print(rmse)
# 5.40092057562