Porting pymc2 code to pymc3: custom likelihood function - python

I am trying to implement the censored data example in Lee&Wagenmakers' book (Chapter 5.5, page 70). In pymc2, I have the following model:
nattempts = 950
nfails = 949
n = 50 # Number of questions
y = np.zeros(nattempts)
y[nattempts-1] = 1
z = 30
unobsmin = 15
unobsmax = 25
unobsrange = np.arange(unobsmin,unobsmax+1)
theta = pymc.Uniform("theta",lower = .25, upper = 1)
#pymc.observed
def Ylike(value=z, theta = theta, n=n, censorn=nfails, unobs=unobsrange):
ylikeobs = pymc.binomial_like(x=value, n=n, p=theta)
ylikeunobs = np.array([])
for i in unobs:
ylikeunobs = np.append(pymc.binomial_like(x=i, n=n, p=theta),ylikeunobs)
return ylikeobs+sum(ylikeunobs)*censorn
testmodel = pymc.Model([theta,Ylike])
mcmc = pymc.MCMC(testmodel)
mcmc.sample(iter = 20000, burn = 50, thin = 2)
which involved the decorater #pymc.observed.
I think I need to express the likelihood using the pm.DensityDist, however, I could not figure it out how to.

OK, I found out how to do it:
with pm.Model():
theta = pm.Uniform("theta",lower = .25, upper = 1)
def logp(value,n,p):
return pm.dist_math.bound(
pm.dist_math.binomln(n, value)
+ pm.dist_math.logpow(p, value)
+ pm.dist_math.logpow(1 - p, n - value),
0 <= value, value <= n,
0 <= p, p <= 1)
def Censorlike(value=z, n=n, censorn=nfails, unobs=unobsrange):
ylikeobs = logp(value=value, n=n, p=theta)
ylikeunobs = 0
for i in unobs:
ylikeunobs += logp(value=i, n=n, p=theta)
return ylikeobs+ylikeunobs*censorn
ylike = pm.DensityDist('ylike', Censorlike, observed={'value':z,'n':n,'censorn':nfails,'unobs':unobsrange})
trace = pm.sample(3e3)

Related

Python function calling with variable vs raw numbers

I am trying to implement a pso algorithm from Wikipedia https://en.wikipedia.org/wiki/Particle_swarm_optimization.
My problem is that when I am calling the cost function with a variable (Gbest), and then manually calling the cost function (with the Gbest data) I get a different output (cost) like the image bellow:
Code fault
I am new to python so thank you for any suggestions.
Here is the complete code:
import matplotlib.pyplot as plt
import numpy as np
from control.matlab import *
A = np.array([[0,0,1],[0,1,0],[1,2,-2]])
B = np.array( [[0],[1],[0]])
C = np.array([[0, 1,0]])
D = np.zeros([C.shape[0],B.shape[1]])
sys = ss(A,B,C,D)
sys_tf = tf(sys)
s = tf('s')
def cost(kp,ki):
global sys_tf, G, y, t, r
G = kp + ki/s
C = feedback(sys_tf*G, 1)
y, t = step(C, linspace(0,100))
r = np.ones(len(t))
return np.sum(y-r)**2
part = 100
ite = 10000
dim = 2
w = 0.001
wdamp = 0.99
phip = 0.9
phig = 0.1
blo, bup = -10,10
x = np.zeros([dim, part])
v = np.zeros([dim, part])
pbest = np.zeros([dim, part])
gbest = np.array([1000000,1000000])
for i in range(part):
for k in range(dim):
x[k][i] = pbest[k][i] = np.random.uniform(blo, bup)
v[k][i] = np.random.uniform(-np.abs(bup - blo), np.abs(bup - blo))
if cost(pbest[0][i], pbest[1][i]) < cost(gbest[0], gbest[1]):
gbest = np.array([pbest[0][i], pbest[1][i]])
for it in range(ite):
for i in range(part):
for k in range(dim):
rp = np.random.uniform(0,1)
rg = np.random.uniform(0,1)
v[k,:] = w*v[k,:] + phip*rp*(pbest[k,:] - x[k,:]) + phig*rg*(gbest[k] - x[k,:])
x[k,:] = x[k,:] + v[k,:]
w = w*wdamp
if cost(x[0][i], x[1][i]) < cost(pbest[0][i], pbest[1][i]):
pbest[:,i] = x[:,i]
if cost(pbest[0][i], pbest[1][i]) < cost(gbest[0], gbest[1]):
gbest = np.array([pbest[0][i], pbest[1][i]])
plt.plot(t, y, 'ro')
plt.plot(t, r, 'x')
plt.pause(0.005)
plt.title(gbest)
print([gbest, cost(gbest[0], gbest[1])])

Fitting model to data using scipy differential evolution: "RuntimeError: The map-like callable must be of the form f(func, iterable)..."

I am trying to fit a model to data (extracted from an Excel file and imported using pandas), using a likelihood method. However, when running the code I get a "RuntimeError: The map-like callable must be of the form f(func, iterable), returning a sequence of numbers the same length as 'iterable'" error, which occurred at the "result_simul_G = minimize(negLogLike, params, method = 'differential_evolution', args=(x, y),)" line. Below I have my code; it's very integrated so I couldn't find a way to illustrate what's happening without showing most of it.
#================================================================================
import numpy as np
import pandas as pd
import os
from lmfit import minimize, Parameters, Parameter, report_fit
params = Parameters()
params.add('gamma', value=.45, min=0, max=1, vary = True)
params.add('n', value = 1, min=0, max=3, vary = True)
filename = 'data.xlsx'
#================================================================================
def negLogLike(params, xData, yData):
new_xData = []
new_yData = []
for i in range(len(yData)):
if ((yData[i] != 0) and (xData[i] != 0)):
new_xData.append(xData[i])
new_yData.append(yData[i])
model_result = model(new_xData, params)
nll = 0
epsilon = 10**-10
for i in range(len(new_yData)):
if (model_result[i] < epsilon):
model_result[i] = epsilon
if (model_result[i] > 1 - epsilon):
model_result[i] = 1 - epsilon
nll += new_yData[i] * np.log(model_result[i]) + (1 - new_yData[i]) * np.log(1 - model_result[i])
return -nll
#================================================================================
def model(x, params):
try: # Get parameters
g = params['gamma'].value
n = params['n'].value
except KeyError:
g, n = params
y = 1 - np.exp(-g * x**n)
return y
#================================================================================
def GetFits(DataFrame):
cell_count = 2300000
GFP_GC_SIMUL = np.ones(DataFrame.shape[0], float)
GFP_IC_SIMUL = np.ones(DataFrame.shape[0], float)
# Data
for i in range(DataFrame.shape[0]):
GFP_GC_SIMUL[i] = DataFrame.loc[i, 'GFP genomes'] / cell_count
GFP_IC_SIMUL[i] = DataFrame.loc[i, 'GFP IU'] / cell_count
x = np.array(GFP_GC_SIMUL[10:-10])
y = np.array(GFP_IC_SIMUL[10:-10])
print('len=', len(x), x.dtype, ', x=', x)
print('------------------------')
print('len=', len(y), y.dtype, ', y=', y)
result_simul_G = minimize(negLogLike, params, method = 'differential_evolution', args=(x, y),)
#================================================================================
DataFrame = pd.read_excel('data.xlsx', engine='openpyxl')
GetFits(DataFrame)
When debugging on my own I used print statements to see what x and y data was being supplied to the minimizer and this is what it showed:
len= 34 float64 , x= [0.14478261 0.28695652 0.28695652 0.28695652 0.57391304 0.57391304
0.57391304 0.8738913 0.8738913 0.8738913 1.16086957 1.16086957
1.16086957 1.44780435 1.44780435 1.44780435 1.73478261 1.73478261
1.73478261 2.03476087 2.03476087 2.03476087 2.32173913 2.32173913
2.32173913 2.60869565 2.60869565 2.60869565 2.86956522 2.86956522
2.86956522 7.17391304 7.17391304 7.17391304]
------------------------
len= 34 float64 , y= [0.005 0.01180435 0.01226087 0.01158696 0.036 0.03704348
0.03467391 0.07030435 0.06556522 0.07567391 0.1001087 0.09852174
0.0986087 0.13626087 0.13978261 0.13956522 0.16847826 0.16408696
0.19391304 0.1945 0.21319565 0.19052174 0.32204348 0.23330435
0.25028261 0.28136957 0.26293478 0.25893478 0.28273913 0.29717391
0.273 0.60826087 0.60834783 0.59482609]
I know this is quite a lot but I would appreciate any and all help.

Python sympy solve with functions

I im trying to define at what flow "pressure_hill" are equal to "pressure_returnline" in function def calculateflow(Q):
im trying to use sympy solve but get error message below.
pressure_returnlie and pressure_hill are calculating a pressure drop that depends on flowvalue from water. I know that both will have the same pressure drop and I want to calculate what flow I have in each pipe.
When running the code I get TypeError: cannot determine truth value of Relational
Is there a better way than using sympy solver?
Thanks // Sebastian
import numpy as np
import fluids
from scipy import stats
# #input h and D0
# h = 100 # mm
D0 = 149# mm
# flow = 5/60 #m3/min
# density = 999 #kg/m3
pi = 3.1416
# mu = 1E-3 # viscosity
# length = 10 # meter
# #calculation
def valve_pos(pos,control_sig,time):## Change / Secound
cntrl_max = 254
cntrl_min = 1
cntrl_mid = 127
time_forw = 1
time_back = 1.5
time_forw_min = 6
time_back_min = 5
min_pos = 5
max_pos = 149
cntrl_time = 0
cntrl_time_min = 0
way = 1
if control_sig == cntrl_mid:
return 0
if control_sig > cntrl_mid:
cntrl_time = time_forw
cntrl_time_min = time_forw_min
way = 1
else:
cntrl_time = time_back
cntrl_time_min = time_back_min
way = -1
coeff = stats.linregress([cntrl_min,cntrl_mid,cntrl_max],[-1,0,1])
rate = ((max_pos - min_pos) / cntrl_time) * (control_sig*coeff[0] + coeff[1]) * time
rate_min = ((max_pos - min_pos) / cntrl_time_min) * way * time
if abs(rate) < abs(rate_min):
rate = rate_min
if ((pos + rate) > max_pos) & (way > 0):
rate = 0
if ((pos - rate) < min_pos) & (way < 0):
rate = 0
return rate
def velocity(D0, flow):
d = D0/1000
area = (d**2*pi)/4
w0 = flow/area # flow velocity m/s
return w0
def knife_valve_pressure_loss (h,D0, density, flow):
w0 = velocity(D0,flow)
if h/D0 < 0.9 and h/D0 >= 0.2:
a = np.array([7.661175,-72.63827,345.7625,-897.8331,1275.939,-938.8331,278.8193])
i = np.array([0,1,2,3,4,5,6])
dzeta = np.exp(2.3*sum(a*(h/D0)**i))
elif h/D0 >= 0.9:
dzeta = 0.6 - 0.6 * (h/D0)
elif h/D0 < 0.2 and h/D0 >= 0.0:
dzeta = 13114 * (h/D0)**2 - 5216.1 * h/D0 + 553.17
else:
print ('formula cannot be used for this h/D0')
return 0
pressure_loss = dzeta * density * 0.5 * w0**2
if pressure_loss < 0:
pressure_loss = 0
return pressure_loss/100000
def pipe_losses(D0,flow,density,length,mu= 1E-3):
w0 = velocity(D0,flow)
Re = fluids.Reynolds(V=w0, D=D0/1000, rho=1000, mu=mu)
fd = fluids.friction_factor(Re, eD=1E-5/0.05)
K = fluids.K_from_f(fd=fd, L=length, D=D0/1000)
K += fluids.exit_normal()
K += 4*fluids.bend_rounded(Di=D0/1000, angle=90, fd=fd)
K += 3.6
pressure_loss_pipe_fittings = fluids.dP_from_K(K, rho=density, V=w0)/100000
liftheight_loss = 2/10.2
return pressure_loss_pipe_fittings + liftheight_loss
def pressure_hill(flow, h = 149, length = 17, D0 = 139, density = 999):
p_valve = knife_valve_pressure_loss (h,D0, density, flow)
p_system = pipe_losses(D0,flow,density,length)
return p_valve + p_system
def pressure_returnline(flow, h = 149, length = 1, D0 = 139, density = 999):
p_valve = knife_valve_pressure_loss (h,D0, density, flow)
p_system = pipe_losses((76/1000),flow,density,0.5)
return p_valve + p_system
def calculateflow(Q):
from sympy import symbols, solve, Eq, Symbol
x,y = symbols('x,y')
sol = solve(pressure_hill(x) - pressure_returnline(y) ,(x,y))
sol
calculateflow(0.1)
Traceback (most recent call last):
File "<ipython-input-59-42d41eef9605>", line 1, in <module>
calculateflow(0.1)
File "system_losses.py", line 122, in calculateflow
sol = solve(pressure_hill(x) - pressure_returnline(y) ,(x,y))
File "system_losses.py", line 109, in pressure_hill
p_valve = knife_valve_pressure_loss (h,D0, density, flow)
File "system_losses.py", line 90, in knife_valve_pressure_loss
if pressure_loss < 0:
File "relational.py", line 398, in __bool__
raise TypeError("cannot determine truth value of Relational")
TypeError: cannot determine truth value of Relational

GMRES residual plotting

I'm trying to do a plot of convergence of this GMRES alghorithm. I managed to create a class that make me print the residual at each iteration but I can't find a way to extract this data into an array so that i can plot it with matplotlib.
Here is my code:
matrixSize = 25
A = Atridiag(2, -1, -1, matrixSize)
A = scipy.sparse.csc_matrix (A)
b = np.matrix(np.ones((matrixSize, 1)))
x1 = np.matrix(np.ones((matrixSize, 1)))
M_i=scipy.sparse.linalg.spilu(A)
M2=scipy.sparse.linalg.LinearOperator((matrixSize,matrixSize),M_i.solve)
nmax_iter = 1
rstart = 1
tol = 1e-12
e = np.zeros((nmax_iter + 1, 1))
rr = 1
class gmres_counter(object):
def __init__(self, disp=True):
self._disp = disp
self.niter = 0
self.callbacks = []
def __call__(self, rk=None):
self.callbacks.append(str(rk))
self.niter += 1
if self._disp:
print('%s' %(str(rk)))
counter = gmres_counter()
x, info = scipy.sparse.linalg.gmres(A, b, x0=x1, tol=tol, restart=rstart,
M=M2, callback=counter)

FloatingPointError: overflow encountered in double_scalars

I've set up numpy.seterr as follows:
np.seterr(invalid='raise', over ='raise', under='raise')
And I'm getting the following error:
c = beta[j,i] + oneminusbeta[j,i]
FloatingPointError: overflow encountered in double_scalars
I've checked what beta[j,i] and oneminusbeta[j,i] are at the point of crash, and these are their values:
beta: -131.340389182
oneminusbeta: 0.0
Please note, this line of addition (beta[j,i] + oneminusbeta[j,i]) has run for thousands of lines in a loop (that performs image classification) before crashing here at this point.
How can I deal with this? Is it necessary to change the type of the numpy arrays?
This is how I've initialized them:
beta = np.empty([m,n])
oneminusbeta = np.empty([m,n])
Is it possible to cast the individual values before adding them up? Rather than changing the entire array declarations? Or is this even a serious issue? Would it be safe to simply turn off the numpy.seterr configuration and let the calculations go ahead without raising the error?
Edit
Someone suggested below, and I suspected as well, that the values being added shouldn't cause an overflow. Then how can I find out where the overflow is really happening?
This is my code:
epthreshold = 709
enthreshold = -708
f.write("weights["+str(i)+", " + str(j)+"] = math.exp(beta: " +str(beta[j,i])+ " + oneminusbeta: " + str(oneminusbeta[j,i])+")\n" )
c = beta[j,i] + oneminusbeta[j,i]
weights[i,j] = math.exp(np.clip(c, enthreshold, epthreshold))
And when I check my log file, this is the line I get:
weights[5550, 13] = math.exp(beta: -131.340389182 + oneminusbeta: 0.0)
Edit 2
Here's the rest of my code, where variables n,m and H have already been initialized to integer values:
import numba
import numpy as np
import statsmodels.api as sm
weights = np.empty([n,m])
for curr_n in range(n):
for curr_m in range(m):
weights[curr_n,curr_m] = 1.0/(n)
beta = np.empty([m,n])
oneminusbeta = np.empty([m,n])
for curr_class in range(m):
for curr_sample in range(n):
beta[curr_class,curr_sample] = 1./m
epthreshold = 709 # positive exponential threshold
enthreshold = -708
for h in range(H):
print "Boosting round %d ... " % h
z = np.empty([n,m])
for j in range(m): # computing working responses and weights, Step 2(a)(i)
for i in range(no_samples):
i_class = y[i] #get the correct class for the current sample
if h == 0:
z[i,j] = (int(j==i_class) - beta[j,i])/((beta[j,i])*(1. - beta[j,i]))
weights[i,j] = beta[j,i]*(1. - beta[j,i])
else:
if j == i_class:
z[i,j] = math.exp(np.clip(-beta[j,i],enthreshold, epthreshold))
else:
z[i,j] = -math.exp(np.clip(oneminusbeta[j,i], enthreshold, epthreshold))
f.write("weights["+str(i)+", " + str(j)+"] = math.exp(beta: " +str(beta[j,i])+ " + oneminusbeta: " + str(oneminusbeta[j,i])+")\n" )
c = beta[j,i] + oneminusbeta[j,i]
weights[i,j] = math.exp(np.clip(c, enthreshold, epthreshold))
g_h = np.zeros([1,1])
j = 0
# Calculating regression coefficients per class
# building the parameters per j class
for y1_w in zip(z.T, weights.T):
y1, w = y1_w
temp_g = sm.WLS(y1, X, w).fit() # Step 2(a)(ii)
if np.allclose(g_h,0):
g_h = temp_g.params
else:
g_h = np.c_[g_h, temp_g.params]
j = j + 1
if np.allclose(g,0):
g = g_h
else:
g = g + g_h # Step(2)(a)(iii)
# now set g(x), function coefficients according to new formula, step (2)(b)
sum_g = g.sum(axis=1)
for j in range(m):
diff = (g[:,j] - ((1./m) * sum_g))
g[:,j] = ((m-1.)/m) * diff
g_per_round[h,:,j] = g[:,j]
#Now computing beta, Step 2(c)...."
Q = 0.
e = 0.
for j in range(m):
# Calculating beta and oneminusbeta for class j
aj = 0.0
for i in range(no_samples):
i_class = y[i]
X1 = X[i].reshape(1, no_features)
g1 = g[:,j].reshape(no_features, 1)
gc = g[:,i_class].reshape(no_features, 1)
dot = 1. + float(np.dot(X1, g1)) - float(np.dot(X1,gc))
aj = dot
sum_e = 0.
a_q = []
a_q.append(0.)
for j2 in range(m): # calculating sum of e's except for all j except where j=i_class
if j2 != i_class: # g based on j2, not necessarily g1?
g2 = g[:,j2].reshape(no_features, 1)
dot1 = 1. + float(np.dot(X1, g2)) - float(np.dot(X1,gc))
e2 = math.exp(np.clip(dot1,enthreshold, epthreshold))
sum_e = sum_e + e2
a_q.append(dot1)
if (int(j==i_class) == 1):
a_q_arr = np.array(a_q)
alpha = np.array(a_q_arr[1:])
Q = mylogsumexp(f,a_q_arr, 1, 0)
sumalpha = mylogsumexp(f,alpha, 1, 0)
beta[j,i] = -Q
oneminusbeta[j,i] = sumalpha - Q
else:
alpha = a_q
alpha = np.array(alpha[1:])
a_q_arr = np.array(a_q)
Q = mylogsumexp(f,a_q_arr, 0, aj)
sumalpha = log(math.exp(np.clip(Q, enthreshold, epthreshold)) - math.exp(np.clip(aj, enthreshold, epthreshold)))
beta[j,i] = aj - Q
oneminusbeta[j,i] = sumalpha - Q
and the function mylogsumexp is:
def mylogsumexp(f, a, is_class, maxaj, axis=None, b=None):
np.seterr(over="raise", under="raise", invalid="raise")
threshold = -sys.float_info.max
maxthreshold = sys.float_info.max
epthreshold = 709 # positive exponential threshold
enthreshold = -708
a = asarray(a)
if axis is None:
a = a.ravel()
else:
a = rollaxis(a, axis)
if is_class == 1:
a_max = a.max(axis=0)
else:
a_max = maxaj
#bnone = " none "
if b is not None:
a_max = maxaj
b = asarray(b)
if axis is None:
b = b.ravel()
else:
b = rollaxis(b, axis)
a = np.clip(a - a_max, enthreshold, epthreshold)
midout = np.sum(np.exp(a), axis=0)
midout = 1.0 + np.clip(midout - math.exp(a_max), threshold, maxthreshold)
out = np.log(midout)
else:
a = np.clip(a - a_max, enthreshold, epthreshold)
out = np.log(np.sum(np.exp(a)))
out += a_max
if out == float("inf"):
out = maxthreshold
if out == float("-inf"):
out = threshold
return out

Categories