I am using Pymc to run a Gibbs sampler on a simple model with the data set as a list with 110 elements (55 observations in each dimension).
log y[i,j,k] = alpha[i,k] + beta[j,k] + mu[k]
where log y follows a multivariate normal distribution (because k = 2) with some covariance matrix that is modeled as rho, sigma1 and sigma2.
After taking log-transformation, the data becomes a list of 110 numbers ranging from 6 to 15.
This is the piece of code that I have used:
import pymc as pm
from pymc import Normal, Uniform, MvNormal, Exponential, Gamma,InverseGamma
from pymc import MCMC
mu = np.zeros(2, dtype=object)
alpha = np.zeros([10,2], dtype = object)
beta = np.zeros([10,2], dtype = object)
for k in range(2):
mu[k] = Normal('mu_{}'.format(k), 0,1000)
for i in range(0,10):
alpha[i][k] = Normal('alpha_{}_{}'.format(i,k), 0, 1000)
beta[i][k] = Normal('beta_{}_{}'.format(i,k), 0, 1000)
rho = Uniform('rho', lower = -1, upper = 1)
sigma1 = InverseGamma('sigma1', 2.0001,1) #sigma squared
sigma2 = InverseGamma('sigma2', 2.0001,1)
#pm.deterministic
def PRECISION():
PREC = [[sigma2/(sigma1*sigma2*(1-rho)),(-rho*
(sigma1*sigma2)**0.5)/(sigma1*sigma2*(1-rho))],[(-rho*
(sigma1*sigma2)**0.5)/(sigma1*sigma2*(1-rho)), sigma1/(sigma1*sigma2*(1-
rho))]]
return PREC
mean = np.zeros([10,10,2])
mean_list_1 = []
mean_list_2 = []
for i in range(10):
for j in range(10):
mean[i,j,0] = mu[0] + alpha[i][0] + beta[j][0]
mean_list_1.append(mean[i,j,0])
mean[i,j,1] = mu[1] + alpha[i][1] + beta[j][1]
mean_list_2.append(mean[i,j,1])
#Restructure the vector
bi_mean = np.zeros(55, dtype = object)
bi_data = np.zeros(55, dtype = object)
log_Y = np.zeros(55, dtype = object)
for i in range(55):
bi_mean[i] = [mean_list_1[i], mean_list_2[i]]
bi_data[i] = [data[i], data[i+55]]
log_Y = [pm.MvNormal('log-Y_{}'.format(i), bi_mean[i], PRECISION, value =
bi_data[i], observed = True) for i in range(55)]
monitor_list = [sigma1, sigma2, rho,mu, alpha, beta,log_Y]
model = MCMC([monitor_list],calc_deviance=True)
model.sample(iter=10000, burn=5000, thin=5)
I tried running it in Pymc but the resulting values of alpha and beta is too small to match the magnitude of the observations. Is there a way that I can check where I go wrong? Thank you.
Related
I used the Gaussian fit with 3 gauss to adjust but datai but I utility data that sometimes my curve contains only two Gaussians in it not find the parameter remnants to use and but great an error is what there is a method that but allows to change with curve fit function use if two or three gaussians .
for my function main, i have this code :
FitGWPS = mainCurveFitGWPS(global_ws, period, All_Max_GWPS, DoupleDip)
and my code for fit is :
import numpy as np
from scipy.optimize import curve_fit
#Functions-----------------------------------------
#Gaussian function
def _1gaus(X,C,X_mean,sigma):
return C*np.exp(-(X-X_mean)**2/(2*sigma**2))
def _3gaus(x, amp1,cen1,sigma1, amp2,cen2,sigma2, amp3,cen3,sigma3):
return amp1*np.exp(-(x-cen1)**2/(2*sigma1**2)) +\
amp2*np.exp(-(x-cen2)**2/(2*sigma2**2)) + amp3*np.exp(-(x-
cen3)**2/(2*sigma3**2))
def ParamFit (Gws, P, Max, popt_Firstgauss):
#Calculating the Lorentzian PDF values given Gaussian parameters and random variableX
width=0
Amp = []
cen = []
wid = []
for j in range(len(Max-1)):
Amp.append(0.8 * (Gws[Max[j]])) # Amplitude
cen.append(P[Max[j]]) # Frequency
if j == 0 : wid.append(0.3 + width * 2.) # Width
else : wid.append(0.3 + popt_Firstgauss[2] * 2.)
return Amp,wid,cen
def mainCurveFitGWPS(global_ws_in, period_in, All_Max_GWPS, DoupleDip):
#Calculating the Gaussian PDF values given Gaussian parameters and random variable X
# For the first fit we calculate with function of the max values
mean = sum(period_in*(global_ws_in))/sum((global_ws_in ))
sigma = np.sqrt(sum((global_ws_in)*(period_in-mean)**2)/sum((global_ws_in)))
Cst = 1 / ( 2* np.pi * sigma)
width=0
Amp = 0.8 * (global_ws_in[All_Max_GWPS[0]]) # Amplitude
cen = period_in[All_Max_GWPS[0]] # Frequency
wid = 0.3 + width * 2. #Width
Amp = []
cen = []
wid = []
for j in range(len(All_Max_GWPS-1)):
Amp.append(0.8 * (global_ws_in[All_Max_GWPS[j]])) # Amplitude
cen.append(period_in[All_Max_GWPS[j]]) # Frequency
if j == 0 : wid.append(0.3 + width * 2.)
else : wid.append(0.3 + popt_gauss[2] * 2.)
#do the fit!
popt_gauss, pcov_gauss = curve_fit(_1gaus, period_in, global_ws_in, p0 = [Cst,
mean, sigma])
FitGauss = _1gaus(period_in, *popt_gauss)
#I use the center, amplitude, and sigma values which I used to create the fake
#data
popt_3gauss, pcov_3gauss = curve_fit(_3gaus, period_in, global_ws_in, p0=[Amp[0],
cen[0], wid[0],Amp[1], cen[1], wid[1],Amp[2], cen[2], wid[2]], maxfev =5000)
Fit3Gauss = _3gaus(period_in, *popt_3gauss)
return Fit3Gauss
for example picture :
and
I got this code for spectral clustering.
https://github.com/BirdYin/scllc/blob/master/scllc.py
This is a landmark-based spectral clustering code.
What does the locality_linear_coding function do in this code?
class Scllc:
def __locality_linear_coding(self, data, neighbors):
indicator = np.ones([neighbors.shape[0], 1])
penalty = np.eye(self.n_neighbors)
# Get the weights of every neighbors
z = neighbors - indicator.dot(data.reshape(-1,1).T)
local_variance = z.dot(z.T)
local_variance = local_variance + self.lambda_val * penalty
weights = scipy.linalg.solve(local_variance, indicator)
weights = weights / np.sum(weights)
weights = weights / np.sum(np.abs(weights))
weights = np.abs(weights)
return weights.reshape(self.n_neighbors)
def fit(self, X):
[n_data, n_dim] = X.shape
# Select landmarks
if self.func_landmark == 'kmeans':
landmarks, centers, unknown = k_means(X, self.n_landmarks, n_init=1, max_iter=100)
nbrs = NearestNeighbors(metric='euclidean').fit(landmarks)
# Create properties of the sparse matrix Z
[dist, indy] = nbrs.kneighbors(X, n_neighbors = self.n_neighbors)
indx = np.ones([n_data, self.n_neighbors]) * np.asarray(range(n_data))[:, None]
valx = np.zeros([n_data, self.n_neighbors])
self.delta = np.mean(valx)
# Compute all the coded data
for index in range(n_data):
# Compute the weights of its neighbors
localmarks = landmarks[indy[index,:], :]
weights = self.__locality_linear_coding(X[index,:], localmarks)
# Compute the coded data
valx[index] = weights
# Construct sparse matrix
indx = indx.reshape(n_data * self.n_neighbors)
indy = indy.reshape(n_data * self.n_neighbors)
valx = valx.reshape(n_data * self.n_neighbors)
Z = sparse.coo_matrix((valx,(indx,indy)),shape=(n_data,self.n_landmarks))
Z = Z / np.sqrt(np.sum(Z, 0))
# Get first k eigenvectors
[U, Sigma, V] = svds(Z, k = self.n_clusters + 1)
U = U[:, 0:self.n_clusters]
embedded_data = U / np.sqrt(np.sum(U * U, 0))
You can see the documentation of numpy module to deal with n-dimensional array
.For exemple, the dot method do the product of the matrices
Than They have use the scipy module, you can also see the documentation on internet.
the first function of a class is always an initialize method. Because the user have to call it to fully use the class. It is the first function where are defined and saved all the variables that the user want
From pybss, I can't execute a simple using of the function ffdiag, which is by the way defined in my script bss.py :
from numpy import dot,diag,eye,zeros
from numpy.linalg import svd,pinv,multi_dot,norm,inv,cholesky
from scipy.linalg import expm
# remove later
import numpy as np
from . import linalg
# dimension
m=7
# number of matrices
n=2
# Load spectro and WL+GCph+XC
FISH_GCsp = np.loadtxt('Fisher_GCsp_flat.txt')
FISH_XC = np.loadtxt('Fisher_XC_GCph_WL_flat.txt')
# Marginalizing over uncommon parameters between the two matrices
COV_GCsp_first = np.linalg.inv(FISH_GCsp)
COV_XC_first = np.linalg.inv(FISH_XC)
COV_GCsp = COV_GCsp_first[0:m,0:m]
COV_XC = COV_XC_first[0:m,0:m]
# Invert to get Fisher matrix
FISH_sp = np.linalg.inv(COV_GCsp)
FISH_xc = np.linalg.inv(COV_XC)
# Drawing a random set of commuting matrices
C=np.zeros((n,m,m));
B=np.zeros((m,m));
C[0] = np.array(FISH_sp)
C[1] = np.array(FISH_xc)
# Perform operation of diagonalisation
invV, B, ut = ffdiag(C, 10, 1.0e-10, 100)
#print(B)
# Print diagonal matrices
M0 = np.dot(np.dot(B,C[0]),B.T)
M1 = np.dot(np.dot(B,C[1]),B.T)
print(M0)
print('')
print(M1)
FISH_final = M0 + M1
def ffdiag_update(R_tau,ortho):
'''
Single update for the non-orthogonal FFDIAG algorithm. Set ortho = True to
do the proper update for the orthogonal version of the algorithm.
'''
Dk = {}
Ek = {}
dim = len(R_tau[0])
n_lags = len(R_tau.keys())
for tau in R_tau.keys():
Dk[tau] = diag(diag(R_tau[tau]))
Ek[tau] = R_tau[tau] - Dk[tau]
W = zeros((dim,dim))
if ortho is False:
z = zeros(W.shape)
y = zeros(W.shape)
for i in range(0,dim):
for j in range(0,dim):
for tau in range(0,n_lags):
z[i,j] += Dk[tau][i,i]*Dk[tau][j,j]
y[i,j] += Dk[tau][j,j]*Ek[tau][i,j]
# compute W
for i in range(0,dim):
for j in range(i+1,dim):
W[i][j] = (z[i,j]*y[j,i]-z[i,i]*y[i,j])/(z[j,j]*z[i,i]-z[i,j]*z[i,j])
W[j][i] = (z[i,j]*y[i,j]-z[j,j]*y[j,i])/(z[j,j]*z[i,i]-z[i,j]*z[i,j])
else:
num = zeros((dim,dim))
den = zeros((dim,dim))
for i in range(0,dim):
for j in range(i+1,dim):
for tau in range(0,n_lags):
num[i,j] += Ek[tau][i,j]*(Dk[tau][i,i] - Dk[tau][j,j])
den[i,j] += (Dk[tau][i,i]-Dk[tau][j,j])**2
if i != j:
W[i,j] = num[i,j]/den[i,j]
# W must be skew-symmetric (W = -W^T)
W[j,i] = -W[i,j]
return W
def amuse(X, tau = 1):
'''
Runs the AMUSE algorithm on the signal matrix X; extracts a full
set of X.shape[0] sources
INPUT:
------
X : array, required
N_sig x t matrix of signal mixtures
tau : integer, optional
sets the lag used for cross-correlation matrix
computation
OUTPUT:
------
A : array
n_sig x n_sig mixing matrix
W : array
n_sig x n_sig unmixing matrix
S : array
n_sig x t array of extracted sources
'''
Rx = dot(X,X.T)
ux,sx,vx = svd(Rx, full_matrices = False)
psi = sx**0.5
C = diag(1/psi)
Y = dot(C,X)
t = X.shape[1]
Ry = linalg.lagged_covariance(Y,tau)[tau]
uy,sy,vy = svd((Ry + Ry.T)/2, full_matrices=False)
S = dot(vy.T,Y)
A = dot(dot(uy, diag(psi)), vy)
W = pinv(A)
return A,W,S
def sobi(X, max_lag = 15):
'''
Blind source separation using SOBI (second-order blind identification)
algorithm.
INPUT:
------
X : array, required
N_sig x t matrix of signal mixtures
max_lag : int, optional
maximum lag (in samples) for covariance matrix calculation
'''
Kw,Kd = linalg.whitening_matrix(X, len(X[:,0]))
Z = dot(Kw,X)
R_tau = linalg.lagged_covariance(Z,max_lag)
D = linalg.joint_diagonalizer(R_tau)
S = dot(D.T,Z)
A = dot(Kd,D)
W = dot(D.T,Kw)
return A,W,S
def ffdiag(X, max_lag = 10, eps = 1.0e-10, max_iter = 100):
'''
Blind source separation using FFDIAG. This version does not require that
the estimated mixing matrix be orthogonal.
INPUT:
------
X : array, required
N_sig x t matrix of signal mixtures
max_lag : int, optional
maximum lag (in samples) for covariance matrix calculation
eps : double, optional
convergence criterion for matrix updates
max_iter : int, optional
maximum number of iterations/updates
'''
R_tau = linalg.lagged_covariance(X,max_lag)
dim = len(R_tau[0])
n_lags = len(R_tau.keys())
W = zeros((dim,dim))
V = eye(dim)
C = R_tau
niter = 0
theta = 0.9
iter_eps = 1.0
while iter_eps > eps and niter < max_iter:
niter += 1
Vn1 = V
for tau in range(0,n_lags):
C[tau] = multi_dot([eye(dim) + W,C[tau],(eye(dim)+W).T])
# update term
W = ffdiag_update(C,False)
if norm(W) > theta:
W = (W*theta)/norm(W)
# update V
V = dot(eye(dim) + W,V)
delta = 0
for i in range(0,dim):
for j in range(0,dim):
if i == j:
pass
else:
delta += (V[i][j]-Vn1[i][j])**2
iter_eps = (delta/(dim*(dim-1)))
ut = dot(V,X)
return inv(V),V, ut
def ortho_ffdiag(X, max_lag = 10, eps = 1.0e-08, max_iter = 100):
'''
Blind source separation using FFDIAG. This version (like SOBI, AMUSE, etc.)
finds an orthogonal mixing matrix.
INPUT:
------
X : array, required
N_sig x t matrix of signal mixtures
max_lag : int, optional
maximum lag (in samples) for covariance matrix calculation
eps : double, optional
convergence criterion for matrix updates
max_iter : int, optional
maximum number of iterations/updates
'''
R_tau = linalg.lagged_covariance(X,max_lag)
dim = len(R_tau[0]) # formerly N
n_lags = len(R_tau.keys()) # formerly K
W = zeros((dim,dim))
V = eye(dim)
C = R_tau
n_iter = 0
theta = 0.9
iter_eps = 1.0
while iter_eps > eps and n_iter < max_iter:
n_iter += 1
Vn1 = V
for tau in range(0,n_lags):
C[tau] = multi_dot([eye(dim) + W,C[tau],(eye(dim)+W).T])
W = ffdiag_update(C,True)
if norm(W) > theta:
W = (W*theta)/norm(W)
# update V
V = dot(expm(W),V)
delta = 0
for i in range(0,dim):
for j in range(0,dim):
if i != j:
delta += (V[i][j]-Vn1[i][j])**2
eps = (delta/(dim*(dim-1)))
ut = dot(V,X)
return inv(V),V, ut
def fobi(X):
'''
Blind source separation via the FOBI (fourth order blind identification)
algorithm.
'''
R_x = linalg.lagged_covariance(X,0)
C = cholesky(R_x[0])
Y = dot(inv(C),X)
R_y = (norm(Y)**2)*(dot(Y,Y.T))
u,s,Y_i = svd(R_y)
alpha = dot(Y_i,Y) # messages (extracted sources)
X_i = dot(C,Y_i) # signatures (mixing matrix)
W_i = pinv(X_i) # unmixing matrix
# return things in A,W,S order to conform to other bss methods
return X_i,W_i,alpha
But I get a common error that I can't manage to fix :
Traceback (most recent call last):
File "bss.py", line 8, in
from . import linalg
ImportError: attempted relative import with no known parent package
You can see the browsing on this image :
I have also to put :
from linalg import linalg instead of but problem remains.
Another thing to do is to put :
from linalg import *
In this case, I get the following error :
Traceback (most recent call last):
File "bss.py", line 36, in <module>
invV, B, ut = ffdiag(C, 10, 1.0e-10, 100)
NameError: name 'ffdiag' is not defined
Is there a link between this error message and the line "from linalg import *"
I am using Scipy's odrpack to fit a linear function to some data that has uncertainties in both the x and y dimensions. Each data point has it's own uncertainty that is asymmetric.
I can fit a function using symmetric uncertainties, but this is not a true representation of my data.
How can I perform the fit with this in mind?
This is my code so far. It receives input data as a command line argument, and the uncertainties i'm using are just random numbers at the moment. (also, two fits are happening, one for positive data points another for the negative. The reasons are unrelated to this question)
import sys
import numpy as np
import scipy.odr.odrpack as odrpack
def f(B, x):
return B[0]*x + B[1]
xdata = sys.argv[1].split(',')
xdata = [float(i) for i in xdata]
xdata = np.array(xdata)
#find indices of +/- data
zero_ind = np.where(xdata >= 0)[0][0]
x_p = xdata[zero_ind:]
x_m = xdata[:zero_ind+1]
ydata = sys.argv[2].split(',')
ydata = [float(i) for i in ydata]
ydata = np.array(ydata)
y_p = ydata[zero_ind:]
y_m = ydata[:zero_ind+1]
sx_m = np.random.random(len(x_m))
sx_p = np.random.random(len(x_p))
sy_m = np.random.random(len(y_m))
sy_p = np.random.random(len(y_p))
linear = odrpack.Model(f)
data_p = odrpack.RealData(x_p, y_p, sx=sx_p, sy=sy_p)
odr_p = odrpack.ODR(data_p, linear, beta0=[1.,2.])
out_p = odr_p.run()
data_m = odrpack.RealData(x_m, y_m, sx=sx_m, sy=sy_m)
odr_m = odrpack.ODR(data_m, linear, beta0=[1.,2.])
out_m = odr_m.run()
Thanks!
I will just give you solution with random data,I could not bother to import your data
import numpy as np
import scipy.odr.odrpack as odrpack
np.random.seed(1)
N = 10
x = np.linspace(0,5,N)*(-1)
y = 2*x - 1 + np.random.random(N)
sx = np.random.random(N)
sy = np.random.random(N)
def f(B, x):
return B[0]*x + B[1]
linear = odrpack.Model(f)
# mydata = odrpack.Data(x, y, wd=1./np.power(sx,2), we=1./np.power(sy,2))
mydata = odrpack.RealData(x, y, sx=sx, sy=sy)
myodr = odrpack.ODR(mydata, linear, beta0=[1., 2.])
myoutput = myodr.run()
myoutput.pprint()
Than we got
Beta: [ 1.92743947 -0.94409236]
Beta Std Error: [ 0.03117086 0.11273067]
Beta Covariance: [[ 0.02047196 0.06690713]
[ 0.06690713 0.26776027]]
Residual Variance: 0.04746112419196648
Inverse Condition #: 0.10277763521624257
Reason(s) for Halting:
Sum of squares convergence
I am following Andrew's Coursera course on machine learning. I am trying to build a 3 layers neural net for digit recognition in Python (784 input, 25 hidden, 10 output). However, I am unable to get the predictions (of the training data) correct (accuracy < 5% at 100 iter, accuracy not increasing with iteration).
J (the cost function) seems to be going down (see photo 1) and I have done gradient checking (before minimizing) and it seems to match to around 1e-11 (see photo 2).
I have compared the theta1 and theta2 after 100 iterations to my working matlab code (see code snippet 1 for octave and code snippet 2 for python). It seems theta1 is reasonably similar but theta2 is very different -- see code snippet 2. (I know they should differ because of the different optimisation routines. However, firstly, I have place the same initial thetas into both codes. Secondly, my reasoning is that they should start to converge, or at least get close, after 100 iterations)
The only error I see is:
-c:32: RuntimeWarning: overflow encountered in exp
when running the sigmoid during the optimising. However, I was told that this is not essential and it is normal to encounter this error during optimising? Furthermore, because it is a sigmoid, anytime the input is large, it will tend towards 1 anyways.
I have also attached my code in snippet 3. I have cut out all the other non-essential bits (like gradient checking) to make it as short as possible.
I would appreciate any help into this as I cannot even find where it is going wrong, let alone fix it. Thank you.
Photos:
J (cost function) decreasing to 1.8 after 12 iterations
Gradient checking before optimizing, they look very similar
Code snippet:
Initializing Neural Network Parameters ...
initial1
-0.0100100
-0.0771400
-0.1113800
-0.0230100
0.0547800
-0.0505500
-0.0731200
-0.0988700
0.0128000
-0.0855400
-0.1002500
-0.1137200
-0.0669300
-0.0999900
0.0084500
-0.0363200
-0.0588600
-0.0431100
-0.1133700
-0.0326300
0.0282800
0.0052400
-0.1134600
-0.0617700
0.0267600
initial2
0.0273700
0.1026000
-0.0502100
-0.0699100
0.0190600
0.1004000
0.0784600
-0.0075900
-0.0362100
0.0286200
Doing fminunc
Training Neural Network...
Iteration 100 | Cost: 6.219605e-01
theta1
-0.0099719
-0.0768462
-0.1109559
-0.0229224
0.0545714
-0.0503575
-0.0728415
-0.0984935
0.0127513
-0.0852143
-0.0998682
-0.1132869
-0.0666751
-0.0996092
0.0084178
-0.0361817
-0.0586359
-0.0429458
-0.1129383
-0.0325057
0.0281723
0.0052200
-0.1130279
-0.0615348
0.0266581
theta2
1.124918
1.603780
-1.266390
-0.848874
0.037956
-1.360841
2.145562
-1.448657
-1.262285
-1.357635
theta1_initial
[-0.01001 -0.07714 -0.11138 -0.02301 0.05478 -0.05055 -0.07312 -0.09887
0.0128 -0.08554 -0.10025 -0.11372 -0.06693 -0.09999 0.00845 -0.03632
-0.05886 -0.04311 -0.11337 -0.03263 0.02828 0.00524 -0.11346 -0.06177
0.02676]
theta2_initial
[ 0.02737 0.1026 -0.05021 -0.06991 0.01906 0.1004 0.07846 -0.00759
-0.03621 0.02862]
Doing fminunc
-c:32: RuntimeWarning: overflow encountered in exp
theta1
[-0.00997202 -0.07680716 -0.11086841 -0.02292044 0.05455335 -0.05034252
-0.07280686 -0.09842603 0.01275117 -0.08516515 -0.0997987 -0.11319546
-0.06664666 -0.09954009 0.00841804 -0.03617494 -0.05861458 -0.04293555
-0.1128474 -0.0325006 0.02816879 0.00522031 -0.1129369 -0.06151103
0.02665508]
theta2
[ 0.27954826 -0.08007496 -0.36449273 -0.22988024 0.06849659 -0.47803973
1.09023041 -0.25570559 -0.24537494 -0.40341995]
#-----------------BEGIN HEADERS-----------------
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import csv
import scipy
#-----------------END HEADERS-----------------
#-----------------BEGIN FUNCTION 1-----------------
def randinitialize(L_in, L_out):
w = np.zeros((L_out, 1 + L_in))
epsilon_init = 0.12
w = np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init
return w
#-----------------END FUNCTION 1-----------------
#-----------------BEGIN FUNCTION 2-----------------
def sigmoid(lz):
g = 1.0/(1.0+np.exp(-lz))
return g
#-----------------END FUNCTION 2-----------------
#-----------------BEGIN FUNCTION 3-----------------
def sigmoidgradient(lz):
g = np.multiply(sigmoid(lz),(1-sigmoid(lz)))
return g
#-----------------END FUNCTION 3-----------------
#-----------------BEGIN FUNCTION 4-----------------
def nncostfunction(ltheta_ravel, linput_layer_size, lhidden_layer_size, lnum_labels, lx, ly, llambda_reg):
ltheta1 = np.array(np.reshape(ltheta_ravel[:lhidden_layer_size * (linput_layer_size + 1)], (lhidden_layer_size, (linput_layer_size + 1))))
ltheta2 = np.array(np.reshape(ltheta_ravel[lhidden_layer_size * (linput_layer_size + 1):], (lnum_labels, (lhidden_layer_size + 1))))
ltheta1_grad = np.zeros((np.shape(ltheta1)))
ltheta2_grad = np.zeros((np.shape(ltheta2)))
y_matrix = []
lm = np.shape(lx)[0]
eye_matrix = np.eye(lnum_labels)
for i in range(len(ly)):
y_matrix.append(eye_matrix[int(ly[i])-1,:]) #The minus one as python is zero based
y_matrix = np.array(y_matrix)
a1 = np.hstack((np.ones((lm,1)), lx)).astype(float)
z2 = sigmoid(ltheta1.dot(a1.T))
a2 = (np.concatenate((np.ones((np.shape(z2)[1], 1)), z2.T), axis=1)).astype(float)
a3 = sigmoid(ltheta2.dot(a2.T))
h = a3
J_unreg = 0
J = 0
J_unreg = (1/float(lm))*np.sum(\
-np.multiply(y_matrix,np.log(h.T))\
-np.multiply((1-y_matrix),np.log(1-h.T))\
,axis=None)
J = J_unreg + (llambda_reg/(2*float(lm)))*\
(np.sum(\
np.multiply(ltheta1[:,1:],ltheta1[:,1:])\
,axis=None)+np.sum(\
np.multiply(ltheta2[:,1:],ltheta2[:,1:])\
,axis=None))
delta3 = a3.T - y_matrix
delta2 = np.multiply((delta3.dot(ltheta2[:,1:])), (sigmoidgradient(ltheta1.dot(a1.T))).T)
cdelta2 = ((a2.T).dot(delta3)).T
cdelta1 = ((a1.T).dot(delta2)).T
ltheta1_grad = (1/float(lm))*cdelta1
ltheta2_grad = (1/float(lm))*cdelta2
theta1_hold = ltheta1
theta2_hold = ltheta2
theta1_hold[:,0] = 0;
theta2_hold[:,0] = 0;
ltheta1_grad = ltheta1_grad + (llambda_reg/float(lm))*theta1_hold;
ltheta2_grad = ltheta2_grad + (llambda_reg/float(lm))*theta2_hold;
thetagrad_ravel = np.concatenate((np.ravel(ltheta1_grad), np.ravel(ltheta2_grad)))
return (J, thetagrad_ravel)
#-----------------END FUNCTION 4-----------------
#-----------------BEGIN FUNCTION 5-----------------
def predict(ltheta1, ltheta2, x):
m, n = np.shape(x)
p = np.zeros(m)
h1 = sigmoid((np.hstack((np.ones((m,1)),x.astype(float)))).dot(ltheta1.T))
h2 = sigmoid((np.hstack((np.ones((m,1)),h1))).dot(ltheta2.T))
for i in range(0,np.shape(h2)[0]):
p[i] = np.argmax(h2[i,:])
return p
#-----------------END FUNCTION 5-----------------
## Setup the parameters you will use for this exercise
input_layer_size = 784; # 28x28 Input Images of Digits
hidden_layer_size = 25; # 25 hidden units
num_labels = 10; # 10 labels, from 0 to 9
data = []
#Reading in data, split into X and y, rewrite label 0 to 10 (for easy comparison to course)
with open('train.csv', 'rb') as csvfile:
has_header = csv.Sniffer().has_header(csvfile.read(1024))
csvfile.seek(0) # rewind
data_csv = csv.reader(csvfile, delimiter=',')
if has_header:
next(data_csv)
for row in data_csv:
data.append(row)
data = np.array(data)
x = data[:,1:]
y = data[:,0]
y = y.astype(int)
for i in range(len(y)):
if y[i] == 0:
y[i] = 10
#Set basic parameters
m, n = np.shape(x)
lambda_reg = 1.0
#Randomly initalize weights for Theta_initial
#theta1_initial = np.genfromtxt('tt1.csv', delimiter=',')
#theta2_initial = np.genfromtxt('tt2.csv', delimiter=',')
theta1_initial = randinitialize(input_layer_size, hidden_layer_size);
theta2_initial = randinitialize(hidden_layer_size, num_labels);
theta_initial_ravel = np.concatenate((np.ravel(theta1_initial), np.ravel(theta2_initial)))
#Doing optimize
fmin = scipy.optimize.minimize(fun=nncostfunction, x0=theta_initial_ravel, args=(input_layer_size, hidden_layer_size, num_labels, x, y, lambda_reg), method='L-BFGS-B', jac=True, options={'maxiter': 10, 'disp': True})
fmin
theta1 = np.array(np.reshape(fmin.x[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, (input_layer_size + 1))))
theta2 = np.array(np.reshape(fmin.x[hidden_layer_size * (input_layer_size + 1):], (num_labels, (hidden_layer_size + 1))))
p = predict(theta1, theta2, x);
for i in range(len(y)):
if y[i] == 10:
y[i] = 0
correct = [1 if a == b else 0 for (a, b) in zip(p,y)]
accuracy = (sum(map(int, correct)) / float(len(correct)))
print 'accuracy = {0}%'.format(accuracy * 100)
I think I have fixed the problem: it seems I messed up the index
should be:
y_matrix.append(eye_matrix[int(ly[i]),:])
instead of:
y_matrix.append(eye_matrix[int(ly[i])-1,:])