I have an optimization formulation where I have multiple decision variables, each decision variable has its own quadratic cost function term. I am planning to use a piecewise linear approximation to simplify the objective function through using the 'Piecewise' function in pyomo. I managed to that in a simple toy problem where I have a single decision variable, the issue arises when I am dealing with many decision variables. Having to write a new line for each decision variable with its own 'Piecewise' function is not viable, so I am trying to automate that with a for loop similar to the way you can do that with constraints.
Here's an example toy problem of what I'm trying to do:
import numpy as np
from pyomo.environ import *
from pyomo.core import *
from pyomo.opt import SolverFactory
def cost_function_0(x):
return x ** 2 + 3 * x + 4
def cost_function_1(x):
return x ** 2 + 6 * x - 2
xdata = np.linspace(-10, 10, 50)
ydata_0 = list(cost_function_0(xdata))
ydata_1 = list(cost_function_1(xdata))
xdata = list(xdata)
model = ConcreteModel()
model.N = range(2)
model.X = Var(model.N, bounds=(-10, 10))
model.Y = Var(model.N)
model.piecewise_0 = Piecewise(model.Y[0],model.X[0],
pw_pts=xdata,
pw_constr_type='EQ',
f_rule=ydata_0,
pw_repn='CC')
model.piecewise_1 = Piecewise(model.Y[1],model.X[1],
pw_pts=xdata,
pw_constr_type='EQ',
f_rule=ydata_1,
pw_repn='CC')
model.obj = Objective(expr=model.Y[0] + model.Y[1], sense=minimize)
opt = SolverFactory('glpk')
obj_val = opt.solve(model)
print('Decision variables: ', model.X[0].value, model.X[1].value)
print('Objective value: ', model.Y[0].value + model.Y[1].value)
So I am trying to replace the process of manually creating the Piecewise objects (model.piecewise_0, model.piecewise_1, ....) with an automated for loop but I got no luck so far.
Thanks in advance!
I hope this answer helps in some way:
import numpy as np
from pyomo.environ import *
from pyomo.core import *
from pyomo.opt import SolverFactory
A = {0:1, 1:1}
B = {0:3, 1:6}
C = {0:4, 1:-2}
lo = {0:-10, 1:-10}
up = {0:10, 1:10}
parts = {0:50, 1:50}
def cost_function(x,a,b,c):
return a * x ** 2 + b * x + c
model = ConcreteModel()
model.N = range(2)
def bounds_rule(model, i):
return (lo[i], up[i])
model.X = Var(model.N, bounds=bounds_rule)
model.Y = Var(model.N)
for idx,n in enumerate(model.N):
xdata = np.linspace(lo[n],up[n],parts[n])
ydata = list(cost_function(xdata,A[n],B[n],C[n]))
xdata = list(xdata)
piecewise = Piecewise(model.Y[n],model.X[n],
pw_pts=xdata,
pw_constr_type='EQ',
f_rule=ydata,
pw_repn='CC')
model.add_component('piecewise_'+ str(idx), piecewise)
def objetive(model):
return sum (model.Y[n] for n in model.N)
model.obj = Objective(rule = objetive, sense=minimize)
opt = SolverFactory('glpk')
obj_val = opt.solve(model)
print('Decision variables: ', model.X[0].value, model.X[1].value)
print('Objective value: ', model.Y[0].value + model.Y[1].value)
Related
I am having some trouble with a model I want to analyze. I am trying to plot two differential equations however I am very new to doing this and am not getting it to work. Any help is appreciated
#Polyaneuploid cell development during cancer
#two eqns
#Fixed Points:
#13.37526
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
def modelC(C,t):
λc = 0.0601
K = 2000
α = 1 * (10**-4)
ν = 1 * (10**-6)
λp = 0.1
γ = 2
def modelP(P,t):
λc = 0.0601
K = 2000
α = 1 * (10**-4)
ν = 1 * (10**-6)
λp = 0.1
γ = 2
#returning odes
dPdt = ((λp))*P(1-(C+(γ*P))/K)+ (α*C)
dCdt = ((λc)*C)(1-(C+(γ*P))/K)-(α*C) + (ν*P)
return dPdt, dCdt
#initial conditions
C0= 256
P0 = 0
#time points
t = np.linspace(0,30)
#solve odes
P = odeint(modelP,t,P0, args = (C0,))
C = odeint(modelC,t,C0, args= (P0,))
#P = odeint(modelP, P0 , t)
#P = P[:, 2]
#C = odeint(modelC, C0 , t)
#C = C[:, 2]
#plot results
plt.plot(t,np.log10(C0))
plt.plot(t,np.log10(P0))
plt.xlabel('time in days')
plt.ylabel('x(t)')
plt.show()
This is just what I have so far, and currently I am getting this error: ValueError: diff requires input that is at least one dimensional
Any tips on how to get the graphs to show?
You need to put your initial conditions in a list like so:
initial_conditions = [C0, P0]
P = odeint(modelP,t,initial_conditions)
you still have some error in your P function where try to access C which is not defined in the local scope of your function neither passed as an argument.
UPDATED
def modelP(P,t,C):
λc = 0.0601
K = 2000
α = 1 * (10**-4)
ν = 1 * (10**-6)
λp = 0.1
γ = 2
#returning odes
dPdt = ((λp))*P(1-(C+(γ*P))/K)+ (α*C)
dCdt = ((λc)*C)(1-(C+(γ*P))/K)-(α*C) + (ν*P)
return dPdt, dCdt
#initial conditions
C0= 256
P0 = 0
Pconds = [P0]
#time points
t = np.linspace(0,30)
#solve odes
P = odeint(modelP,t, Pconds, args=(C0,))
The solver deals with flat arrays with no inherent meaning in the components. You need to add that meaning, unpack the input vector into the state object, at the start of the model function, and remove that meaning, reduce the state to a flat array or list, at the end of the model function.
Here this is simple, the state consists of 2 scalars. Thus a structure for the model function is
def model(X,t):
P, C = X
....
return dPdt, dCdt
Then integrate as
X = odeint(model,(P0,C0),t)
P,C = X.T
plt.plot(t,P)
I'm trying to solve the following system: d²i/dt² + R'(i)/L di/dt + 1/LC i(t) = 1/L dE/dt as a set of coupled first order differential equations:
di/dt = k
dk/dt = 1/L dE/dt - R'(i)/L k - 1/LC i(t)
Here is the code I'm using:
import numpy as np
import sympy as sp
import matplotlib.pyplot as plt
from scipy.integrate import odeint
#Define model: x = [i , k]
def RLC(x , t):
i = sp.Symbol('i')
t = sp.Symbol('t')
#Data:
E = sp.ln(t + 1)
dE_dt = E.diff(t)
R1 = 1000 #1 kOhm
R2 = 100 #100 Ohm
R = R1 * i + R2 * i**3
dR_di = R.diff(i)
i = x[0]
k = x[1]
L = 10e-3 #10 mHy
C = 1.56e-6 #1.56 uF
#Model
di_dt = k
dk_dt = 1/L * dE_dt - dR_di/L * k - 1/(L*C) * i
dx_dt = np.array([di_dt , dk_dt])
return dx_dt
#init cond:
x0 = np.array([0 , 0])
#time points:
time = np.linspace(0, 30, 1000)
#solve ODE:
x = odeint(RLC, x0, time)
i = x[: , 0]
However, I get the following error: TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'
So, I don't know if sympy and odeint don't work well together. Or maybe is it a problem because I defined t as sp.Symbol?
When you differentiate a function, you get a function back. So you need to evaluate it at a point in order to get a number. To evaluate a sympy expression, you could use .subs() but I prefer .replace() which feels more powerful (at least for me).
You must try and make every single variable have its own name in order to avoid confusion. For example, you replace the float input t with a sympy Symbol from the very beginning, thus losing the value of t. The variables x and i are also repeated in the outer scope which is not good practice if they mean different things.
The following should avoid confusion and hopefully produce something that you were expecting:
import numpy as np
import sympy as sp
import matplotlib.pyplot as plt
from scipy.integrate import odeint
# Define model: x = [i , k]
def RLC(x, t):
# define constants first
i = x[0]
k = x[1]
L = 10e-3 # 10 mHy
C = 1.56e-6 # 1.56 uF
R1 = 1000 # 1 kOhm
R2 = 100 # 100 Ohm
# define symbols (used to find derivatives)
i_symbol = sp.Symbol('i')
t_symbol = sp.Symbol('t')
# Data (differentiate and evaluate)
E = sp.ln(t_symbol + 1)
dE_dt = E.diff(t_symbol).replace(t_symbol, t)
R = R1 * i_symbol + R2 * i_symbol ** 3
dR_di = R.diff(i_symbol).replace(i_symbol, i)
# nothing should contain symbols from here onwards
# variables can however contain sympy expressions
# Model (convert sympy expressions to floats)
di_dt = float(k)
dk_dt = float(1 / L * dE_dt - dR_di / L * k - 1 / (L * C) * i)
dx_dt = np.array([di_dt, dk_dt])
return dx_dt
# init cond:
x0 = np.array([0, 0])
# time points:
time = np.linspace(0, 30, 1000)
# solve ODE:
solution = odeint(RLC, x0, time)
result = solution[:, 0]
print(result)
Just something to note: the value i = x[0] seemed to sit very close to 0 throughout each iteration. This means dR_di stayed basically at 1000 the whole time. I'm not familiar with odeint or your specific ODE, but hopefully this phenomenon is expected and isn't a problem.
I have an exponential function with two known variables, x, and y. I need to find the value of y when I input an x. However, my code could not go through and solve the problem.
My function and all relevant constants are given below:
import math
import numpy as np
import scipy.optimize as optimize
x1=np.array([0,20])
Vt = 0.026
Io = 23*math.pow(10,-10)
Iph = 2.282
idf = 1
Ns = 60
Nm = 1
Rse = 0.5
Rsh = 1000
x = np.linspace(x1.min(),x1.max(),300)
def equation(x,Iph,Io,Rse,Rsh,Ns,Nm,Vt):
return y - Iph + Io*(np.exp((x+y*Rse)/(Ns*Nm*idf*Vt))-1) + x/Rsh + y*Rse/Rsh
y = optimize.newton(equation(10,Iph,Io,Rse,Rsh,Ns,Nm,Vt), 7)
Present output:
File "<ipython-input-172-93ede88c9b49>", line 16, in ivcurve_equation
return y - Iph + Io*(np.exp((x+y*Rse)/(Ns*Nm*idf*Vt))-1) + v/Rsh + I*Rse/Rsh
TypeError: can't multiply sequence by non-int of type 'float'
Expected output:
y = a real and positive value # >0
Have a quick look at the docs and try to do some 'pattern matching'. The parameters of equation should only be variables and not constants. Here is a working version of your code, that you should tailor to your needs:
import math
import numpy as np
import scipy.optimize as optimize
x1=np.array([0,20])
Vt = 0.026
Io = 23*math.pow(10,-10)
Iph = 2.282
idf = 1
Ns = 60
Nm = 1
Rse = 0.5
Rsh = 1000
x_arr = np.linspace(x1.min(),x1.max(),300)
x = x_arr[0]
def equation(y):
return y - Iph + Io*(np.exp((x+y*Rse)/(Ns*Nm*idf*Vt))-1) + x/Rsh + y*Rse/Rsh
result = optimize.newton(equation, 7)
print(result)
Now if you want the output for an array of x's try this:
def equation(y,x):
return y - Iph + Io*(np.exp((x+y*Rse)/(Ns*Nm*idf*Vt))-1) + x/Rsh + y*Rse/Rsh
result = [optimize.newton(equation, 7, args = (a,)) for a in x_arr]
print(result)
Hope this helps!
I have been working with odeint and boundary conditions. Bassically what I am trying to do is to solve the differential equations given in this figure 1
where in my code R=R, ph = Phi, al = alpha, a = a, m = m, l = l and om = omega. The initial conditions that I am trying to implement are R(0)=O(r^l); Phi(0)=O(r^{l-1}) if l/=0 and Phi(0)=O(r) if l=0; a(0) = 1 and a(inf)=1/alpha(inf) (additionally I need that R(inf)=0). I tried to applied the shooting method in order to find initial conditions for alpha that best matches with my boundary conditions. I also need to find the omega that best matches the boundary conditions for R at infinity. The code that I wrote is the following:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
import time
start = time.clock()
def system_DE(IC,p,r):
l = p[0]
m = p[1]
om = p[2]
R = IC[0]
ph = IC[1]
a = IC[2]
al = IC[3]
dR_dr = ph
da_dr = a*((2*l+1)*r/2*(om**2*a**2*R**2/al**2+ph**2+l*(l+1)*a**2*R**2/r**2+m**2*a**2*R**2)-(a**2-1)/(2*r))
dal_dr = al*(da_dr/a-l*(l+1)*(2*l+1)*a**2*R**2/r-(2*l+1)*m**2*a**2*r*R**2+(a**2-1)/r)
dph_dr = -2*ph/r-dal_dr*ph/al+da_dr*ph/a-om**2*a**2*R/al**2+l*(l+1)*a**2*R/r**2+m**2*a**2*R
return [dR_dr,da_dr,dal_dr,dph_dr]
def init(u,p,r):
if p==0:
return np.array([1,r,1,u])
else:
return np.array([r**l,l*r**(l-1),1,u])
l = 0
m = 1
ep = 0.3
n_om = 10
omega = np.linspace(m-ep,m+ep,n_om)
r = np.linspace(0.0001, 100, 1000)
niter = 100
u = 0
tol = 0.1
ustep = 0.01
p = np.zeros(3)
p[0] = l
p[1] = m
for j in range(len(omega)):
p[2] = omega[j]
for i in range(niter):
u += ustep
Y = odeint(system_DE(init(u,p[0],r[0]),p,r), init(u,p[0],r[0]), r)
print Y[-1,2]
print Y[-1,3]
if abs(Y[len(Y)-1,2]-1/Y[len(Y)-1,3]) < tol:
print(i,'times iterations')
print("a'(inf)) = ", Y[len(Y)-1,2])
print('y"(0) =',u)
break
if abs(Y[len(Y)-1,0]) < tol:
print(j,'times iterations in omega')
print("R'(inf)) = ", Y[len(Y)-1,0])
break
However, when I run it I am obtaining: error: The function and its Jacobian must be callable functions.
Could some one help me to understand what my mistake is?
Regards,
Luis Padilla.
To start with, the first argument to odeint is your derivative function system_DE. Just pass its name, no parentheses or arguments. Odeint with call it internally and supply arguments.
I fixed my code and now it is giving me some results. However, when I run it I am obtaining some warnings that I don't know how to solve it. Could some one help me to solve it? Basically my code is this:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
import time
def system_DE(IC,r,l,m,om):
R = IC[0]
ph = IC[1]
a = IC[2]
al = IC[3]
dR_dr = ph
da_dr = a*((2*l+1)*r/2*(om**2*a**2*R**2/al**2+ph**2+l*(l+1)*a**2*R**2/r**2+m**2*a**2*R**2)-(a**2-1)/(2*r))
dal_dr = al*(da_dr/a-l*(l+1)*(2*l+1)*a**2*R**2/r-(2*l+1)*m**2*a**2*r*R**2+(a**2-1)/r)
dph_dr = -2*ph/r-dal_dr*ph/al+da_dr*ph/a-om**2*a**2*R/al**2+l*(l+1)*a**2*R/r**2+m**2*a**2*R
return [dR_dr,dph_dr,da_dr,dal_dr]
def init(u,p,r):
if p==0:
return np.array([1.,r,1.,u])
else:
return np.array([r**p,l*r**(p-1),1,u])
l = 0.
m = 1.
ep = 0.2
n_om = 30
omega = np.linspace(m-ep,m+ep,n_om)
r = np.linspace(0.001, 100, 1000)
niter = 1000
tol = 0.01
ustep = 0.01
for j in range(len(omega)):
print('trying with $omega =$',omega[j])
p = (l,m,omega[j])
u = 0.001
for i in range(niter):
u += ustep
ini = init(u,p[0],r[0])
Y = odeint(system_DE, ini,r,p,mxstep=500000)
if abs(Y[len(Y)-1,2]-1/Y[len(Y)-1,3]) < tol:
break
if abs(Y[len(Y)-1,0]) < tol and abs(Y[len(Y)-1,2]-1/Y[len(Y)-1,3]) < tol:
print(j,'times iterations in omega')
print(i,'times iterations')
print("R'(inf)) = ", Y[len(Y)-1,0])
print("alpha(0)) = ", Y[0,3])
print("\omega",omega[j])
break
plt.subplot(2,1,1)
plt.plot(r,Y[:,0],'r',label = '$R$')
plt.plot(r,Y[:,1],'b',label = '$d R /dr$')
plt.xlim([0,10])
plt.legend()
plt.subplot(2,1,2)
plt.plot(r,Y[:,2],'r',label = 'a')
plt.plot(r,Y[:,3],'b', label = '$alpha$')
plt.xlim([0,10])
plt.legend()
plt.show()
But when I run it I am obtaining this:
lsoda-- warning..internal t (=r1) and h (=r2) are
such that in the machine, t + h = t on the next step
(h = step size). solver will continue anyway
in above, r1 = 0.1243782486482D+01 r2 = 0.8727680448722D-16
How could I fix the problem?
Regards,
Luis Padilla.
I am following Andrew's Coursera course on machine learning. I am trying to build a 3 layers neural net for digit recognition in Python (784 input, 25 hidden, 10 output). However, I am unable to get the predictions (of the training data) correct (accuracy < 5% at 100 iter, accuracy not increasing with iteration).
J (the cost function) seems to be going down (see photo 1) and I have done gradient checking (before minimizing) and it seems to match to around 1e-11 (see photo 2).
I have compared the theta1 and theta2 after 100 iterations to my working matlab code (see code snippet 1 for octave and code snippet 2 for python). It seems theta1 is reasonably similar but theta2 is very different -- see code snippet 2. (I know they should differ because of the different optimisation routines. However, firstly, I have place the same initial thetas into both codes. Secondly, my reasoning is that they should start to converge, or at least get close, after 100 iterations)
The only error I see is:
-c:32: RuntimeWarning: overflow encountered in exp
when running the sigmoid during the optimising. However, I was told that this is not essential and it is normal to encounter this error during optimising? Furthermore, because it is a sigmoid, anytime the input is large, it will tend towards 1 anyways.
I have also attached my code in snippet 3. I have cut out all the other non-essential bits (like gradient checking) to make it as short as possible.
I would appreciate any help into this as I cannot even find where it is going wrong, let alone fix it. Thank you.
Photos:
J (cost function) decreasing to 1.8 after 12 iterations
Gradient checking before optimizing, they look very similar
Code snippet:
Initializing Neural Network Parameters ...
initial1
-0.0100100
-0.0771400
-0.1113800
-0.0230100
0.0547800
-0.0505500
-0.0731200
-0.0988700
0.0128000
-0.0855400
-0.1002500
-0.1137200
-0.0669300
-0.0999900
0.0084500
-0.0363200
-0.0588600
-0.0431100
-0.1133700
-0.0326300
0.0282800
0.0052400
-0.1134600
-0.0617700
0.0267600
initial2
0.0273700
0.1026000
-0.0502100
-0.0699100
0.0190600
0.1004000
0.0784600
-0.0075900
-0.0362100
0.0286200
Doing fminunc
Training Neural Network...
Iteration 100 | Cost: 6.219605e-01
theta1
-0.0099719
-0.0768462
-0.1109559
-0.0229224
0.0545714
-0.0503575
-0.0728415
-0.0984935
0.0127513
-0.0852143
-0.0998682
-0.1132869
-0.0666751
-0.0996092
0.0084178
-0.0361817
-0.0586359
-0.0429458
-0.1129383
-0.0325057
0.0281723
0.0052200
-0.1130279
-0.0615348
0.0266581
theta2
1.124918
1.603780
-1.266390
-0.848874
0.037956
-1.360841
2.145562
-1.448657
-1.262285
-1.357635
theta1_initial
[-0.01001 -0.07714 -0.11138 -0.02301 0.05478 -0.05055 -0.07312 -0.09887
0.0128 -0.08554 -0.10025 -0.11372 -0.06693 -0.09999 0.00845 -0.03632
-0.05886 -0.04311 -0.11337 -0.03263 0.02828 0.00524 -0.11346 -0.06177
0.02676]
theta2_initial
[ 0.02737 0.1026 -0.05021 -0.06991 0.01906 0.1004 0.07846 -0.00759
-0.03621 0.02862]
Doing fminunc
-c:32: RuntimeWarning: overflow encountered in exp
theta1
[-0.00997202 -0.07680716 -0.11086841 -0.02292044 0.05455335 -0.05034252
-0.07280686 -0.09842603 0.01275117 -0.08516515 -0.0997987 -0.11319546
-0.06664666 -0.09954009 0.00841804 -0.03617494 -0.05861458 -0.04293555
-0.1128474 -0.0325006 0.02816879 0.00522031 -0.1129369 -0.06151103
0.02665508]
theta2
[ 0.27954826 -0.08007496 -0.36449273 -0.22988024 0.06849659 -0.47803973
1.09023041 -0.25570559 -0.24537494 -0.40341995]
#-----------------BEGIN HEADERS-----------------
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import csv
import scipy
#-----------------END HEADERS-----------------
#-----------------BEGIN FUNCTION 1-----------------
def randinitialize(L_in, L_out):
w = np.zeros((L_out, 1 + L_in))
epsilon_init = 0.12
w = np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init
return w
#-----------------END FUNCTION 1-----------------
#-----------------BEGIN FUNCTION 2-----------------
def sigmoid(lz):
g = 1.0/(1.0+np.exp(-lz))
return g
#-----------------END FUNCTION 2-----------------
#-----------------BEGIN FUNCTION 3-----------------
def sigmoidgradient(lz):
g = np.multiply(sigmoid(lz),(1-sigmoid(lz)))
return g
#-----------------END FUNCTION 3-----------------
#-----------------BEGIN FUNCTION 4-----------------
def nncostfunction(ltheta_ravel, linput_layer_size, lhidden_layer_size, lnum_labels, lx, ly, llambda_reg):
ltheta1 = np.array(np.reshape(ltheta_ravel[:lhidden_layer_size * (linput_layer_size + 1)], (lhidden_layer_size, (linput_layer_size + 1))))
ltheta2 = np.array(np.reshape(ltheta_ravel[lhidden_layer_size * (linput_layer_size + 1):], (lnum_labels, (lhidden_layer_size + 1))))
ltheta1_grad = np.zeros((np.shape(ltheta1)))
ltheta2_grad = np.zeros((np.shape(ltheta2)))
y_matrix = []
lm = np.shape(lx)[0]
eye_matrix = np.eye(lnum_labels)
for i in range(len(ly)):
y_matrix.append(eye_matrix[int(ly[i])-1,:]) #The minus one as python is zero based
y_matrix = np.array(y_matrix)
a1 = np.hstack((np.ones((lm,1)), lx)).astype(float)
z2 = sigmoid(ltheta1.dot(a1.T))
a2 = (np.concatenate((np.ones((np.shape(z2)[1], 1)), z2.T), axis=1)).astype(float)
a3 = sigmoid(ltheta2.dot(a2.T))
h = a3
J_unreg = 0
J = 0
J_unreg = (1/float(lm))*np.sum(\
-np.multiply(y_matrix,np.log(h.T))\
-np.multiply((1-y_matrix),np.log(1-h.T))\
,axis=None)
J = J_unreg + (llambda_reg/(2*float(lm)))*\
(np.sum(\
np.multiply(ltheta1[:,1:],ltheta1[:,1:])\
,axis=None)+np.sum(\
np.multiply(ltheta2[:,1:],ltheta2[:,1:])\
,axis=None))
delta3 = a3.T - y_matrix
delta2 = np.multiply((delta3.dot(ltheta2[:,1:])), (sigmoidgradient(ltheta1.dot(a1.T))).T)
cdelta2 = ((a2.T).dot(delta3)).T
cdelta1 = ((a1.T).dot(delta2)).T
ltheta1_grad = (1/float(lm))*cdelta1
ltheta2_grad = (1/float(lm))*cdelta2
theta1_hold = ltheta1
theta2_hold = ltheta2
theta1_hold[:,0] = 0;
theta2_hold[:,0] = 0;
ltheta1_grad = ltheta1_grad + (llambda_reg/float(lm))*theta1_hold;
ltheta2_grad = ltheta2_grad + (llambda_reg/float(lm))*theta2_hold;
thetagrad_ravel = np.concatenate((np.ravel(ltheta1_grad), np.ravel(ltheta2_grad)))
return (J, thetagrad_ravel)
#-----------------END FUNCTION 4-----------------
#-----------------BEGIN FUNCTION 5-----------------
def predict(ltheta1, ltheta2, x):
m, n = np.shape(x)
p = np.zeros(m)
h1 = sigmoid((np.hstack((np.ones((m,1)),x.astype(float)))).dot(ltheta1.T))
h2 = sigmoid((np.hstack((np.ones((m,1)),h1))).dot(ltheta2.T))
for i in range(0,np.shape(h2)[0]):
p[i] = np.argmax(h2[i,:])
return p
#-----------------END FUNCTION 5-----------------
## Setup the parameters you will use for this exercise
input_layer_size = 784; # 28x28 Input Images of Digits
hidden_layer_size = 25; # 25 hidden units
num_labels = 10; # 10 labels, from 0 to 9
data = []
#Reading in data, split into X and y, rewrite label 0 to 10 (for easy comparison to course)
with open('train.csv', 'rb') as csvfile:
has_header = csv.Sniffer().has_header(csvfile.read(1024))
csvfile.seek(0) # rewind
data_csv = csv.reader(csvfile, delimiter=',')
if has_header:
next(data_csv)
for row in data_csv:
data.append(row)
data = np.array(data)
x = data[:,1:]
y = data[:,0]
y = y.astype(int)
for i in range(len(y)):
if y[i] == 0:
y[i] = 10
#Set basic parameters
m, n = np.shape(x)
lambda_reg = 1.0
#Randomly initalize weights for Theta_initial
#theta1_initial = np.genfromtxt('tt1.csv', delimiter=',')
#theta2_initial = np.genfromtxt('tt2.csv', delimiter=',')
theta1_initial = randinitialize(input_layer_size, hidden_layer_size);
theta2_initial = randinitialize(hidden_layer_size, num_labels);
theta_initial_ravel = np.concatenate((np.ravel(theta1_initial), np.ravel(theta2_initial)))
#Doing optimize
fmin = scipy.optimize.minimize(fun=nncostfunction, x0=theta_initial_ravel, args=(input_layer_size, hidden_layer_size, num_labels, x, y, lambda_reg), method='L-BFGS-B', jac=True, options={'maxiter': 10, 'disp': True})
fmin
theta1 = np.array(np.reshape(fmin.x[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, (input_layer_size + 1))))
theta2 = np.array(np.reshape(fmin.x[hidden_layer_size * (input_layer_size + 1):], (num_labels, (hidden_layer_size + 1))))
p = predict(theta1, theta2, x);
for i in range(len(y)):
if y[i] == 10:
y[i] = 0
correct = [1 if a == b else 0 for (a, b) in zip(p,y)]
accuracy = (sum(map(int, correct)) / float(len(correct)))
print 'accuracy = {0}%'.format(accuracy * 100)
I think I have fixed the problem: it seems I messed up the index
should be:
y_matrix.append(eye_matrix[int(ly[i]),:])
instead of:
y_matrix.append(eye_matrix[int(ly[i])-1,:])