Dynamic open-loop optimization with Pyomo - python

I want to implement a dynamic open-loop optimization. I find that the documentation is not covering these use-case that well, so I found these two examples
https://jckantor.github.io/CBE32338/05.02-Optimization-Control-and-Estimation-using-Pyomo-With-Windows-ipopt.html
https://ndcbe.github.io/CBE60499/02.06-Pyomo-DAE.html
I can get a solution if I use the discretization method dae.finite_difference but not with dae.collocations. It says:
pyomo.common.errors.ApplicationError: Solver (ipopt) did not exit normally
I want to use collocation because I want to constrain the input to pice-wise constant which apparently is possible only with model = discretizer.reduce_collocation_points(...) which does not exist if I do not use the collocation method. Here is my working code ( I use bokeh to visualise the results)
from pyomo.environ import *
from pyomo.dae import *
import numpy as np
from utils.utils import SimSettings
from bokeh.plotting import figure, show
from bokeh.layouts import row, gridplot, grid
from bokeh.io import output_file
from bokeh.palettes import Category20b
results_dir = 'results/ukf_opt'
# Final time
tf = 120
# time vector
t_sim = np.arange(0, tf, 3)
# Define the model
model = ConcreteModel()
model.t = ContinuousSet(bounds=(0, tf)) #time
model.glu = Var(model.t, bounds=(0, None)) # first state
model.bio = Var(model.t, bounds=(0, None))# second state
model.lac = Var(model.t, bounds=(0, None))# Third state
model.v = Var(model.t, bounds=(0, 5)) # forth state
model.fglu = Var(model.t, bounds=(0, 0.5)) # Input
model.dglu = DerivativeVar(model.glu, wrt=model.t)
model.dbio = DerivativeVar(model.bio, wrt=model.t)
model.dlac = DerivativeVar(model.lac, wrt=model.t)
model.dv = DerivativeVar(model.v, wrt=model.t)
model.v_max = Param(initialize=0.08)
model.km = Param(initialize=0.4)
model.v_maxl = Param(initialize=0.04)
model.kml = Param(initialize=0.2)
model.cglu = Param(initialize=5)
# Initial condition
x0 = [1., 2., 0.1, 1.]
# Definition of the odes
def ode_glu(model, t):
vl = (model.v_maxl * model.glu[t]) / (model.kml + model.glu[t])
v = (model.v_max * model.glu[t]) / (model.km + model.glu[t])
return model.dglu[t] == - v * model.bio[t] - vl * model.bio[t] + model.fglu[t] / model.v[t] * (
model.cglu - model.glu[t])
def ode_bio(model, t):
v = (model.v_max * model.glu[t]) / (model.km + model.glu[t])
return model.dbio[t] == v * model.bio[t] - model.fglu[t] / model.v[t] * model.bio[t]
def ode_lac(model, t):
vl = (model.v_maxl * model.glu[t]) / (model.kml + model.glu[t])
return model.dlac[t] == vl * model.bio[t] - model.fglu[t] / model.v[t] * model.lac[t]
def ode_v(model, t):
return model.dv[t] == model.fglu[t]
model.diffeq_glu = Constraint(model.t, rule=ode_glu)
model.diffeq_bio = Constraint(model.t, rule=ode_bio)
model.diffeq_lac = Constraint(model.t, rule=ode_lac)
model.diffeq_v = Constraint(model.t, rule=ode_v)
# Fix initial condition
model.glu[0].fix(x0[0])
model.bio[0].fix(x0[1])
model.lac[0].fix(x0[2])
model.v[0].fix(x0[3])
# I initialize a constant input. This is only to run a simulation to initialize the model. If I don't do it even the finite difference method does not work
model.Usim = Suffix(direction=Suffix.LOCAL)
model.Usim[model.fglu] = {0: 0.01}
# Here is the point where the problem is discretized. If I use dae.collocation instead of dae.finite_difference it does not work
discretizer = TransformationFactory('dae.finite_difference')
discretizer.apply_to(model, nfe=len(t_sim), scheme='FORWARD')
# Simulate the system with a constant input
sim = Simulator(model, package='casadi')
tsim, profiles = sim.simulate(integrator='idas', varying_inputs=model.Usim)
sim.initialize_model()
# Define objective function for the optimiyation
model.ls_control = sum([(model.lac[t]) ** 2 for t in model.t])
model.obj = Objective(expr=model.ls_control, sense=maximize)
# Solve the model. The print_level has basically no effect I do not why
solver = SolverFactory('ipopt')
solver.options['print_level'] = 5
solver.options['output_file'] = f"{ss.results_dir}/my_ipopt_log.txt"
solver.solve(model).write()
# Plot the results
profiles = np.concatenate((np.array([model.glu[t]() for t in t_sim]).reshape(-1, 1),
np.array([model.bio[t]() for t in t_sim]).reshape(-1, 1),
np.array([model.lac[t]() for t in t_sim]).reshape(-1, 1),
np.array([model.v[t]() for t in t_sim]).reshape(-1, 1)),
axis=1)
output_file(f"{results_dir}/simulation.html")
p_states = []
color_list = ['blue', 'green', 'magenta', 'red']
for k, state in enumerate(['glu', 'bio', 'lac', 'v']):
p = figure(background_fill_color='#fafafa', y_range=(0, 3.5))
p.line(t_sim, profiles[:, k],
line_color=color_list[k], line_alpha=0.5, line_width=2)
p_states.append(p)
grid = gridplot([[p_states[0], p_states[1]], [p_states[2], p_states[3]]])
show(grid)

Related

Python function calling with variable vs raw numbers

I am trying to implement a pso algorithm from Wikipedia https://en.wikipedia.org/wiki/Particle_swarm_optimization.
My problem is that when I am calling the cost function with a variable (Gbest), and then manually calling the cost function (with the Gbest data) I get a different output (cost) like the image bellow:
Code fault
I am new to python so thank you for any suggestions.
Here is the complete code:
import matplotlib.pyplot as plt
import numpy as np
from control.matlab import *
A = np.array([[0,0,1],[0,1,0],[1,2,-2]])
B = np.array( [[0],[1],[0]])
C = np.array([[0, 1,0]])
D = np.zeros([C.shape[0],B.shape[1]])
sys = ss(A,B,C,D)
sys_tf = tf(sys)
s = tf('s')
def cost(kp,ki):
global sys_tf, G, y, t, r
G = kp + ki/s
C = feedback(sys_tf*G, 1)
y, t = step(C, linspace(0,100))
r = np.ones(len(t))
return np.sum(y-r)**2
part = 100
ite = 10000
dim = 2
w = 0.001
wdamp = 0.99
phip = 0.9
phig = 0.1
blo, bup = -10,10
x = np.zeros([dim, part])
v = np.zeros([dim, part])
pbest = np.zeros([dim, part])
gbest = np.array([1000000,1000000])
for i in range(part):
for k in range(dim):
x[k][i] = pbest[k][i] = np.random.uniform(blo, bup)
v[k][i] = np.random.uniform(-np.abs(bup - blo), np.abs(bup - blo))
if cost(pbest[0][i], pbest[1][i]) < cost(gbest[0], gbest[1]):
gbest = np.array([pbest[0][i], pbest[1][i]])
for it in range(ite):
for i in range(part):
for k in range(dim):
rp = np.random.uniform(0,1)
rg = np.random.uniform(0,1)
v[k,:] = w*v[k,:] + phip*rp*(pbest[k,:] - x[k,:]) + phig*rg*(gbest[k] - x[k,:])
x[k,:] = x[k,:] + v[k,:]
w = w*wdamp
if cost(x[0][i], x[1][i]) < cost(pbest[0][i], pbest[1][i]):
pbest[:,i] = x[:,i]
if cost(pbest[0][i], pbest[1][i]) < cost(gbest[0], gbest[1]):
gbest = np.array([pbest[0][i], pbest[1][i]])
plt.plot(t, y, 'ro')
plt.plot(t, r, 'x')
plt.pause(0.005)
plt.title(gbest)
print([gbest, cost(gbest[0], gbest[1])])

Fitting model to data using scipy differential evolution: "RuntimeError: The map-like callable must be of the form f(func, iterable)..."

I am trying to fit a model to data (extracted from an Excel file and imported using pandas), using a likelihood method. However, when running the code I get a "RuntimeError: The map-like callable must be of the form f(func, iterable), returning a sequence of numbers the same length as 'iterable'" error, which occurred at the "result_simul_G = minimize(negLogLike, params, method = 'differential_evolution', args=(x, y),)" line. Below I have my code; it's very integrated so I couldn't find a way to illustrate what's happening without showing most of it.
#================================================================================
import numpy as np
import pandas as pd
import os
from lmfit import minimize, Parameters, Parameter, report_fit
params = Parameters()
params.add('gamma', value=.45, min=0, max=1, vary = True)
params.add('n', value = 1, min=0, max=3, vary = True)
filename = 'data.xlsx'
#================================================================================
def negLogLike(params, xData, yData):
new_xData = []
new_yData = []
for i in range(len(yData)):
if ((yData[i] != 0) and (xData[i] != 0)):
new_xData.append(xData[i])
new_yData.append(yData[i])
model_result = model(new_xData, params)
nll = 0
epsilon = 10**-10
for i in range(len(new_yData)):
if (model_result[i] < epsilon):
model_result[i] = epsilon
if (model_result[i] > 1 - epsilon):
model_result[i] = 1 - epsilon
nll += new_yData[i] * np.log(model_result[i]) + (1 - new_yData[i]) * np.log(1 - model_result[i])
return -nll
#================================================================================
def model(x, params):
try: # Get parameters
g = params['gamma'].value
n = params['n'].value
except KeyError:
g, n = params
y = 1 - np.exp(-g * x**n)
return y
#================================================================================
def GetFits(DataFrame):
cell_count = 2300000
GFP_GC_SIMUL = np.ones(DataFrame.shape[0], float)
GFP_IC_SIMUL = np.ones(DataFrame.shape[0], float)
# Data
for i in range(DataFrame.shape[0]):
GFP_GC_SIMUL[i] = DataFrame.loc[i, 'GFP genomes'] / cell_count
GFP_IC_SIMUL[i] = DataFrame.loc[i, 'GFP IU'] / cell_count
x = np.array(GFP_GC_SIMUL[10:-10])
y = np.array(GFP_IC_SIMUL[10:-10])
print('len=', len(x), x.dtype, ', x=', x)
print('------------------------')
print('len=', len(y), y.dtype, ', y=', y)
result_simul_G = minimize(negLogLike, params, method = 'differential_evolution', args=(x, y),)
#================================================================================
DataFrame = pd.read_excel('data.xlsx', engine='openpyxl')
GetFits(DataFrame)
When debugging on my own I used print statements to see what x and y data was being supplied to the minimizer and this is what it showed:
len= 34 float64 , x= [0.14478261 0.28695652 0.28695652 0.28695652 0.57391304 0.57391304
0.57391304 0.8738913 0.8738913 0.8738913 1.16086957 1.16086957
1.16086957 1.44780435 1.44780435 1.44780435 1.73478261 1.73478261
1.73478261 2.03476087 2.03476087 2.03476087 2.32173913 2.32173913
2.32173913 2.60869565 2.60869565 2.60869565 2.86956522 2.86956522
2.86956522 7.17391304 7.17391304 7.17391304]
------------------------
len= 34 float64 , y= [0.005 0.01180435 0.01226087 0.01158696 0.036 0.03704348
0.03467391 0.07030435 0.06556522 0.07567391 0.1001087 0.09852174
0.0986087 0.13626087 0.13978261 0.13956522 0.16847826 0.16408696
0.19391304 0.1945 0.21319565 0.19052174 0.32204348 0.23330435
0.25028261 0.28136957 0.26293478 0.25893478 0.28273913 0.29717391
0.273 0.60826087 0.60834783 0.59482609]
I know this is quite a lot but I would appreciate any and all help.

Trouble getting an lmfit model to work with an integrate.quad function

I've defined a function that works ok to simulate some data.
For the physicists: the function should generate the Hebel-Slichter coherence peak below a superconducting transition, in NMR data.
I'd like to now fit the function to my data, using lmfit.
I'm getting an error message that seems to be because I'm using additional arguments (parameters) within the integrate.quad part of the function. I'm really not that skilled with python. Is it possible to get this working?
The code is a bit messy as I've got a section (commented out) just for plotting the data and the simulation. The code and error message are plotted below:
import numpy as np
import matplotlib.pyplot as plt
import scipy.integrate as integrate
#import scipy.special as special
#from scipy.optimize import curve_fit
from lmfit import Model
#The function that describes the change in the DOS around the Fermi-level as a function of temperature below Tc
def T1Tfunc(En, Temperature , Gamma0 , Nfactor, Gap2 , Tc):
kB = 8.617E-5
Delta0 = kB * Tc * Gap2 / 2
Delta1 = Delta0 * np.tanh(((Tc / Temperature)-1) ** 0.5)
Gamma1 = Gamma0 * (Temperature / Tc) ** Nfactor
Enp = En + 8.974E-6
EnB = En + Gamma1 * 1j
EnBp = Enp + Gamma1 * 1j
Ns = (EnB / np.sqrt(EnB * EnB - Delta1 * Delta1))
Nsp = (EnBp / np.sqrt(EnBp * EnBp - Delta1 * Delta1))
Ms = (Delta1 / np.sqrt(EnB * EnB - Delta1 * Delta1))
Msp = (Delta1 / np.sqrt(EnBp * EnBp - Delta1 * Delta1))
FE = 1/(1 + np.exp(En/(kB*Temperature)))
FEp = 1/(1 + np.exp(Enp/(kB*Temperature)))
func = (np.real(Ns)*np.real(Nsp)+np.real(Ms)*np.real(Msp))*FE*(1-FEp)
return func
#The integration of the DOS, and resulting spin-lattice relaxation rate
def T1T(Temperature , Gamma0 , Nfactor , Gap2 , Tc , Koringaa , Koringab):
kB = 8.61728E-5
# Integration parameters
aint = 0
bint = 0.1
I = integrate.quad(T1Tfunc, aint, bint, args=(Temperature , Gamma0 , Nfactor , Gap2 , Tc))[0]
#For Korringa
return I*(2/(kB*Temperature)) * (Koringaa + Koringab * Temperature)
#For No-Korringa
# return I*(2/(kB*Temperature)) * (Koringaa + Koringab * Tc)
T1T = np.vectorize(T1T)
#Some if functions for when the calculation values end up too small, and for above Tc
def T1Textended(Temperature , Gamma0 , Nfactor, Gap2 , Tc , Koringaa , Koringab):
if Temperature < (0.1 * Tc):
T1Te = 0
elif Temperature < Tc:
T1Te = T1T(Temperature , Gamma0 , Nfactor , Gap2 , Tc , Koringaa , Koringab)
else:
T1Te = Koringaa + Koringab * Temperature
return T1Te
T1Textended = np.vectorize(T1Textended)
#Importing some example data to be fitted
filename = 'Rb2CsC60.txt'
data = np.loadtxt(filename, delimiter=',')
datax = data[:, 0]
datay = data[:, 1]
dataerr = data[:, 2]
#print(data)
# #For plotting a simulation resulting from the function, on top of data
# #Parameters when simulating
# Tc = 32.2
# Gamma0 = 9.56496E-4
# #Gap2 = 4.18126
# Gap2 =
# Nfactor = 1
# Koringaa = 0.02542
# Koringab = 4.24127E-4
# Temperature = np.arange((0.1 * Tc), (1.8 * Tc), (Tc / 100))
# DOST0p0 = T1Textended(Temperature , Gamma0 , Nfactor , Gap2 , Tc , Koringaa , Koringab)
# fig = plt.figure()
# ax = plt.axes()
# line1 = ax.plot(Temperature,DOST0p0)
# ax.scatter(datax,datay)
# #plt.ylim([0, 2])
# #plt.xlim([0, (2 * Tc)])
# plt.title(u"1/T_1T vs. Temperature \n \u0393\u2080 = {} eV, n = {}, T_c = {}".format(Gamma0, Nfactor, Tc))
# plt.xlabel("Temperature (K)")
# plt.ylabel("$1/(T_{1}T)$");
#Now attempting to fit the function to some data
#Define T1Textended as a function to be wrapped by the 'Model' fitting package of lmfit
HSmodel = Model(T1Textended)
#Define and set the parameters for the model, to be fitted
params = Model.Parameters()
params.add('Tc', value=32.2, vary=False)
params.add('Gamma0', value=1E-3, vary=True)
params.add('Nfactor', value=1, vary=False)
params.add('Gap2', value=4.25, vary=True)
params.add('Koringaa', value=1, vary=True)
params.add('Koringab', value=0, vary=True)
#Check that it is read properly and gives all the right input parameters and variables
print(HSmodel.param_names, HSmodel.independent_vars)
Error message:
runfile('C:/Users/Ross Colman/Qsync/Hebel-Slichter/DOS_simulation/HS_fit.py', wdir='C:/Users/Ross Colman/Qsync/Hebel-Slichter/DOS_simulation')
Traceback (most recent call last):
File "C:\Users\Ross Colman\Qsync\Hebel-Slichter\DOS_simulation\HS_fit.py", line 111, in
HSmodel = Model(T1Textended)
File "C:\ProgramData\Anaconda3\lib\site-packages\lmfit\model.py", line 277, in init
self._parse_params()
File "C:\ProgramData\Anaconda3\lib\site-packages\lmfit\model.py", line 489, in _parse_params
raise ValueError(f"varargs '*{fnam}' is not supported")
ValueError: varargs '*args' is not supported
Any help would be appreciated
lmfit.Model() cannot wrap a function that is "vectorized" by numpy.vectorize().

How can I know the dimension of my variable?

I get this error :
ValueError: operands could not be broadcast together with shapes (365,) (2,)
But I'm surprised by this (2,).
How do I know which variable does this dimension (2,) please?
Because none of my variables should have it.
Thank you for your help !
Here, you can see the first script, where I define my function. It include a loop and also another function so I don't know if I can.
I have a lot of variable with (365, ) for the dimension because, it's function of the time, so for 365 days.
I have some fixed variable like the soil parameter, so the dimension for these is (1,)
But I don't know which variable get (2,) dimension ?
import pandas as pd
import numpy as np
def SA(MO = 0,
ETPr = 0,
SWSa = 0,
pb = 1.70 ):
DB = pd.read_excel("~/Documents/Spider/Data/data_base.xlsx", sheet_name = "DB")
DB1 = pd.read_excel("~/Documents/Spider/Bilan_Courgette.xlsx", sheet_name = "sol")
DB2 = pd.read_excel("~/Documents/Spider/Bilan_Courgette.xlsx", sheet_name = "culture")
#Calculs inter. pour déterminer ET0/day
#Array qui reprend "date" en une série 1 -> 365
JourDeLAnnee = pd.Series(range(1,366))
#Mauves
dist_TS = 1+(0.033*np.cos(0.0172 * JourDeLAnnee))
decli_So = 0.409*np.sin((0.0172 * JourDeLAnnee)-1.39)
lat = 0.87266463
ang_Hor_So =np.arccos(-np.tan(lat)*np.tan(decli_So))
gamma = 0.067
#Jaunes
delta = 2504*np.exp((17.27*DB.tsa_by_day)/(DB.tsa_by_day +237.3))/(DB.tsa_by_day +237.3)**2
rg = DB.ens_by_day / 1000000 * 86400
ra = 37.6 * dist_TS * ((ang_Hor_So * np.sin(lat) * np.sin(decli_So)) + \
(np.cos(lat) * np.cos(decli_So) * np.sin(ang_Hor_So)))
rso = (0.75 + (2*0.00001*120)) * ra
tw =(DB.tsa_by_day * np.arctan(0.151977 * ((DB.hra_by_day + 8.313659)**0.5))) + \
np.arctan(DB.tsa_by_day + DB.hra_by_day) - np.arctan(DB.hra_by_day - 1.676331) + \
(0.00391838 * ((DB.hra_by_day)**1.5) * np.arctan(0.023101 * DB.hra_by_day)) - 4.686035
ed = (0.611 * np.exp((17.27 * tw) / (tw + 237.3))) - (0.0008 *(DB.tsa_by_day-tw) * 101.325)
ea =((0.611 * np.exp((17.27*DB.tsa_max) / (DB.tsa_max + 237.3))) + \
(0.611 * np.exp((17.27 * DB.tsa_min) / (DB.tsa_min +237.3)))) / 2.0
rn = (0.77 * rg) - (((1.35 * (rg / rso)) - 0.35) \
* (0.34 - (0.14 * (ed**0.5))) * (4.9E-9) * ((((273+DB.tsa_max)**4)+((273+DB.tsa_min)**4))/2))
#Calcul de G
from typing import List
def get_g_constant(tsa_by_day: List[float], day: int):
assert day >= 1
return 0.38 * (tsa_by_day[day] - tsa_by_day[day-1])
def get_g_for_year(tsa_by_day: List[int]) -> List[float]:
g_list = []
for i in range(1, len(tsa_by_day)):
g_value = get_g_constant(tsa_by_day, i)
g_list.append(g_value)
return g_list
G = get_g_for_year(DB.tsa_by_day)
G = [DB.tsa_by_day[0]] + G
#Le fameux ET0
ET0 = ((0.408 * delta * (rn - G)) + (gamma * (900 /(DB.tsa_by_day + 273)) * DB.vtt_by_day * (ea - ed))) / \
(delta + (0.067*(1+(0.34 * DB.vtt_by_day))))
# Calcul des paramètres du sol
Profil = 500
pb = 100 / ((MO / 224000) + ((100-MO) / (1.64)))
Os = 0.6355+0.0013* DB1.A -0.1631* pb
Or = 0
lnα = (-4.3003) - (0.0097*DB1.A) + (0.0138* DB1.S ) - (0.0992*MO)
lnn = -1.0846-0.0236 * DB1.A -0.0085 * DB1.S +0.0001 * (DB1.S)**2
nn = np.exp(lnn) + 1
m = 1 - (1/nn)
lnK0 = 1.9582 + 0.0308*DB1.S - 0.6142* pb - 0.1566*MO
λ = -1.8642 - 0.1317*DB1.A + 0.0067*DB1.S
α = np.exp(lnα)
K0 = np.exp(lnK0)
θPf2 =(((1 + ((α*(10**2.5))**nn))**(-m))*( Os - Or)) + Or
θPf4 =(((1 + ((α*(10**4.2))**nn))**(-m))*( Os - Or)) + Or
SWS = θPf2 - θPf4
diff = SWS*SWSa
aj = diff / 2
θPf2New = θPf2 + aj
θPf4New = θPf4 - aj
#Calcul du volume de stock p à atteindre
p = 0.04 *(5 - ET0) + DB2.ptab[0]
θp =(1 - p) * ( θPf2New - θPf4New )+ θPf4New
Vp = θp * Profil
#Le fameux ETP
import datetime
DateS = datetime.datetime.strptime('30/03/2019','%d/%m/%Y').timetuple().tm_yday
DateR = datetime.datetime.strptime('15/09/2019','%d/%m/%Y').timetuple().tm_yday
ETP=ET0.copy()
for n in range(364):
if n >= (DateS - 1) and n <= (DateR - 1) :
ETP[n] = ET0[n] * DB2.Kc[0]
else:
ETP[n] = ET0[n] * DB2.SolNu[0]
ETP[0] = 0
ETPNew = ET0.copy()
ETPNew = ETP - ETP * ETPr
#Le Bilan Hydrique
Stock = ET0.copy()
θ = ET0.copy()
Drainage = ET0.copy()
Irrigation = ET0.copy()
Se = ET0.copy()
SeC = ET0.copy()
θ[0] = θPf2New
Stock[0] = θ[0]*Profil
for i in range(364) :
Se[i] = (θ[i] - Or)/( Os - Or)
if Se[i] > 1 :
SeC[i] = 1
else:
SeC[i] = Se[i]
Drainage[i] = K0 *(((SeC[i])**λ )*(1-(1- SeC[i]**(nn/(nn-1)))**m)**2)*10
if Vp[i] - Stock[i] > 0 : #Ici stock non défini
Irrigation[i] = Vp[i] - Stock[i]
else:
Irrigation[i] = 0
Stock[i+1] = Stock[i] + DB.plu_by_day[i] - ETPNew[i] - Drainage[i] + Irrigation[i]
θ[i+1] = Stock[i+1] / Profil
return (Irrigation.sum())
After, i use a second script to do a sensitivity analysis. And It's here, when I run this script, I get the error 'ValueError: operands could not be broadcast together with shapes (365,) (2,)'
import numpy as np
from SALib.analyze import sobol
from SALib.sample import saltelli
from test import*
import matplotlib.pyplot as plt
# Set up dictionary with system parameters
problem = {
'num_vars': 4,
'names': ['MO', 'ETPr', 'SWSa', 'K0'],
'bounds': [[0, 10],
[0, 0.04135],
[0, 0.2615],
[1.40, 1.70],
]}
# Array with n's to use
nsamples = np.arange(50, 400, 50)
# Arrays to store the index estimates
S1_estimates = np.zeros([problem['num_vars'],len(nsamples)])
ST_estimates = np.zeros([problem['num_vars'],len(nsamples)])
# Loop through all n values, create sample, evaluate model and estimate S1 & ST
for i in range(len(nsamples)):
print('n= '+ str(nsamples[i]))
# Generate samples
sampleset = saltelli.sample(problem, nsamples[i],calc_second_order=False)
# Run model for all samples
output = [SA(*sampleset[j,:]) for j in range(len(sampleset))]
# Perform analysis
results = sobol.analyze(problem, np.asarray(output), calc_second_order=False,print_to_console=False)
# Store estimates
ST_estimates[:,i]=results['ST']
S1_estimates[:,i]=results['S1']
np.save('ST_estimates.npy', ST_estimates)
np.save('S1_estimates.npy', S1_estimates)
S1_estimates = np.load('S1_estimates.npy')
ST_estimates = np.load('ST_estimates.npy')
# Generate figure showing evolution of indices
fig = plt.figure(figsize=(18,9))
ax1 = fig.add_subplot(1,2,1)
handles = []
for j in range(problem['num_vars']):
handles += ax1.plot(nsamples, S1_estimates[j,:], linewidth=5)
ax1.set_title('Evolution of S1 index estimates', fontsize=20)
ax1.set_ylabel('S1', fontsize=18)
ax1.set_xlabel('Number of samples (n)', fontsize=18)
ax1.tick_params(axis='both', which='major', labelsize=14)
ax2 = fig.add_subplot(1,2,2)
for j in range(problem['num_vars']):
ax2.plot(nsamples, ST_estimates[j,:], linewidth=5)
ax2.set_title('Evolution of ST index estimates', fontsize=20)
ax2.set_ylabel('ST', fontsize=18)
ax2.tick_params(axis='both', which='major', labelsize=14)
ax2.set_xlabel('Number of samples (n)', fontsize=18)
fig.legend(handles, problem['names'], loc = 'right', fontsize=11)
plt.savefig('indexevolution.png')
# Calculate parameter rankings
S1_ranks = np.zeros_like(S1_estimates)
ST_ranks = np.zeros_like(ST_estimates)
for i in range(len(nsamples)):
orderS1 = np.argsort(S1_estimates[:,i])
orderST = np.argsort(ST_estimates[:,i])
S1_ranks[:,i] = orderS1.argsort()
ST_ranks[:,i] = orderST.argsort()
Thank you for your help !

theano GRU rnn adam optimizer

Technical information:
OS: Mac OS X 10.9.5
IDE: Eclipse Mars.1 Release (4.5.1), with PyDev and Anaconda interpreter (grammar version 3.4)
GPU: NVIDIA GeForce GT 650M
Libs: numpy, aeosa, Sphinx-1.3.1, Theano 0.7, nltk-3.1
My background: I am very new to theano and numpy and haven't taken a formal course in machine learning or discrete math.
The recurrent neural network for natural language processing I currently use is taken from here:
https://github.com/dennybritz/rnn-tutorial-gru-lstm/blob/master/gru_theano.py
The only change made to this file is replacing references to theano.config.floatX with the string 'float32'.
I also use the utils.py and train.py modules included in the repository, with only minor changes.
The adam optimizer I plan to incorporate in place of the sgd/rms code implemented in the example repository is found here: https://gist.github.com/skaae/ae7225263ca8806868cb
Reproduced here (again with references to the .config.floatX replaced with the hard-coded 'float32'):
(theano as th, theano.shared as thsh, theano.tensor as T, numpy as np)
def adam(loss, all_params, learning_rate=0.001, b1=0.9, b2=0.999, e=1e-8, gamma=1-1e-8):
"""
ADAM update rules
Default values are taken from [Kingma2014]
References:
[Kingma2014] Kingma, Diederik, and Jimmy Ba.
"Adam: A Method for Stochastic Optimization."
arXiv preprint arXiv:1412.6980 (2014).
http://arxiv.org/pdf/1412.6980v4.pdf
"""
updates = []
all_grads = th.grad(loss, all_params)
alpha = learning_rate
t = thsh(np.float32(1))
b1_t = b1*gamma**(t-1) #(Decay the first moment running average coefficient)
for theta_previous, g in zip(all_params, all_grads):
m_previous = thsh(np.zeros(theta_previous.get_value().shape.astype('float32')))
v_previous = thsh(np.zeros(theta_previous.get_value().shape.astype('float32')))
m = b1_t*m_previous + (1 - b1_t)*g # (Update biased first moment estimate)
v = b2*v_previous + (1 - b2)*g**2 # (Update biased second raw moment estimate)
m_hat = m / (1-b1**t) # (Compute bias-corrected first moment estimate)
v_hat = v / (1-b2**t) # (Compute bias-corrected second raw moment estimate)
theta = theta_previous - (alpha * m_hat) / (T.sqrt(v_hat) + e) #(Update parameters)
updates.append((m_previous, m))
updates.append((v_previous, v))
updates.append((theta_previous, theta) )
updates.append((t, t + 1.))
return updates
My question is this:
How would you modify the GRUTheano module to use the Adam method above in place of the builtin sgd/rmsprop function?
It looks like the key changes would be to lines 99-126 of GRUTheano:
# SGD parameters
learning_rate = T.scalar('learning_rate')
decay = T.scalar('decay')
# rmsprop cache updates
mE = decay * self.mE + (1 - decay) * dE ** 2
mU = decay * self.mU + (1 - decay) * dU ** 2
mW = decay * self.mW + (1 - decay) * dW ** 2
mV = decay * self.mV + (1 - decay) * dV ** 2
mb = decay * self.mb + (1 - decay) * db ** 2
mc = decay * self.mc + (1 - decay) * dc ** 2
self.sgd_step = theano.function(
[x, y, learning_rate, theano.Param(decay, default=0.9)],
[],
updates=[(E, E - learning_rate * dE / T.sqrt(mE + 1e-6)),
(U, U - learning_rate * dU / T.sqrt(mU + 1e-6)),
(W, W - learning_rate * dW / T.sqrt(mW + 1e-6)),
(V, V - learning_rate * dV / T.sqrt(mV + 1e-6)),
(b, b - learning_rate * db / T.sqrt(mb + 1e-6)),
(c, c - learning_rate * dc / T.sqrt(mc + 1e-6)),
(self.mE, mE),
(self.mU, mU),
(self.mW, mW),
(self.mV, mV),
(self.mb, mb),
(self.mc, mc)
])
I haven't tested this code, but the only thing you need to change is to tell updates to use adam(..) instead of the updates already provided here, so something like this should work (complete code looks like this (we need to get rid of rmsprop stuff)):
import numpy as np
import theano as theano
import theano.tensor as T
from theano.gradient import grad_clip
import time
import operator
class GRUTheano(object):
def __init__(self, word_dim, hidden_dim=128, bptt_truncate=-1):
# Assign instance variables
self.word_dim = word_dim
self.hidden_dim = hidden_dim
self.bptt_truncate = bptt_truncate
# Initialize the network parameters
E = np.random.uniform(-np.sqrt(1./word_dim), np.sqrt(1./word_dim), (hidden_dim, word_dim))
U = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (6, hidden_dim, hidden_dim))
W = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (6, hidden_dim, hidden_dim))
V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (word_dim, hidden_dim))
b = np.zeros((6, hidden_dim))
c = np.zeros(word_dim)
# Theano: Created shared variables
self.E = theano.shared(name='E', value=E.astype(theano.config.floatX))
self.U = theano.shared(name='U', value=U.astype(theano.config.floatX))
self.W = theano.shared(name='W', value=W.astype(theano.config.floatX))
self.V = theano.shared(name='V', value=V.astype(theano.config.floatX))
self.b = theano.shared(name='b', value=b.astype(theano.config.floatX))
self.c = theano.shared(name='c', value=c.astype(theano.config.floatX))
# We store the Theano graph here
self.theano = {}
self.__theano_build__()
def __theano_build__(self):
E, V, U, W, b, c = self.E, self.V, self.U, self.W, self.b, self.c
x = T.ivector('x')
y = T.ivector('y')
def forward_prop_step(x_t, s_t1_prev, s_t2_prev):
# This is how we calculated the hidden state in a simple RNN. No longer!
# s_t = T.tanh(U[:,x_t] + W.dot(s_t1_prev))
# Word embedding layer
x_e = E[:,x_t]
# GRU Layer 1
z_t1 = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_t1_prev) + b[0])
r_t1 = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_t1_prev) + b[1])
c_t1 = T.tanh(U[2].dot(x_e) + W[2].dot(s_t1_prev * r_t1) + b[2])
s_t1 = (T.ones_like(z_t1) - z_t1) * c_t1 + z_t1 * s_t1_prev
# GRU Layer 2
z_t2 = T.nnet.hard_sigmoid(U[3].dot(s_t1) + W[3].dot(s_t2_prev) + b[3])
r_t2 = T.nnet.hard_sigmoid(U[4].dot(s_t1) + W[4].dot(s_t2_prev) + b[4])
c_t2 = T.tanh(U[5].dot(s_t1) + W[5].dot(s_t2_prev * r_t2) + b[5])
s_t2 = (T.ones_like(z_t2) - z_t2) * c_t2 + z_t2 * s_t2_prev
# Final output calculation
# Theano's softmax returns a matrix with one row, we only need the row
o_t = T.nnet.softmax(V.dot(s_t2) + c)[0]
return [o_t, s_t1, s_t2]
[o, s, s2], updates = theano.scan(
forward_prop_step,
sequences=x,
truncate_gradient=self.bptt_truncate,
outputs_info=[None,
dict(initial=T.zeros(self.hidden_dim)),
dict(initial=T.zeros(self.hidden_dim))])
prediction = T.argmax(o, axis=1)
o_error = T.sum(T.nnet.categorical_crossentropy(o, y))
# Total cost (could add regularization here)
cost = o_error
# Gradients
dE = T.grad(cost, E)
dU = T.grad(cost, U)
dW = T.grad(cost, W)
db = T.grad(cost, b)
dV = T.grad(cost, V)
dc = T.grad(cost, c)
# Assign functions
self.predict = theano.function([x], o)
self.predict_class = theano.function([x], prediction)
self.ce_error = theano.function([x, y], cost)
self.bptt = theano.function([x, y], [dE, dU, dW, db, dV, dc])
self.params = [self.E, self.U, self.W, self.V, self.b, self.c]
updates=adam(cost, self.params)
self.sgd_step = theano.function(
inputs=[x, y],
outputs=[],
updates=updates
)
def calculate_total_loss(self, X, Y):
return np.sum([self.ce_error(x,y) for x,y in zip(X,Y)])
def calculate_loss(self, X, Y):
# Divide calculate_loss by the number of words
num_words = np.sum([len(y) for y in Y])
return self.calculate_total_loss(X,Y)/float(num_words)
def adam(loss, all_params, learning_rate=0.001, b1=0.9, b2=0.999, e=1e-8,
gamma=1-1e-8):
"""
ADAM update rules
Default values are taken from [Kingma2014]
References:
[Kingma2014] Kingma, Diederik, and Jimmy Ba.
"Adam: A Method for Stochastic Optimization."
arXiv preprint arXiv:1412.6980 (2014).
http://arxiv.org/pdf/1412.6980v4.pdf
"""
updates = []
all_grads = theano.grad(loss, all_params)
alpha = learning_rate
t = theano.shared(np.float32(1))
b1_t = b1*gamma**(t-1) #(Decay the first moment running average coefficient)
for theta_previous, g in zip(all_params, all_grads):
m_previous = theano.shared(np.zeros(theta_previous.get_value().shape,
dtype=theano.config.floatX))
v_previous = theano.shared(np.zeros(theta_previous.get_value().shape,
dtype=theano.config.floatX))
m = b1_t*m_previous + (1 - b1_t)*g # (Update biased first moment estimate)
v = b2*v_previous + (1 - b2)*g**2 # (Update biased second raw moment estimate)
m_hat = m / (1-b1**t) # (Compute bias-corrected first moment estimate)
v_hat = v / (1-b2**t) # (Compute bias-corrected second raw moment estimate)
theta = theta_previous - (alpha * m_hat) / (T.sqrt(v_hat) + e) #(Update parameters)
updates.append((m_previous, m))
updates.append((v_previous, v))
updates.append((theta_previous, theta) )
updates.append((t, t + 1.))
return updates

Categories