How can I optimise the weights of CNN using PSO? - python

I want to optimize the weights of CNN using Particle Swarm Optimization. Basically weights are at penultimate layer and filters that are tobe optimised. The PSO replaces the optimisers and rest work is done in same way. Will it be possible by using Keras or Tensorflow? Have written a PSO code that is attached below.
import random
w = 0.729844 # Inertia weight to prevent velocities becoming too large
c1 = 1.496180 # Scaling co-efficient on the social component
c2 = 1.496180 # Scaling co-efficient on the cognitive component
dimension = 20 # Size of the problem
iterations = 3000
swarmSize = 30
# This class contains the code of the Particles in the swarm
class Particle:
velocity = []
pos = []
pBest = []
def __init__(self):
for i in range(dimension):
self.pos.append(random.random())
self.velocity.append(0.01 * random.random())
self.pBest.append(self.pos[i])
return
def updatePositions(self):
for i in range(dimension):
self.pos[i] = self.pos[i] + self.velocity[i]
return
def updateVelocities(self, gBest):
for i in range(dimension):
r1 = random.random()
r2 = random.random()
social = c1 * r1 * (gBest[i] - self.pos[i])
cognitive = c2 * r2 * (self.pBest[i] - self.pos[i])
self.velocity[i] = (w * self.velocity[i]) + social + cognitive
return
def satisfyConstraints(self):
#This is where constraints are satisfied
return
# This class contains the particle swarm optimization algorithm
class ParticleSwarmOptimizer:
solution = []
swarm = []
def __init__(self):
for h in range(swarmSize):
particle = Particle()
self.swarm.append(particle)
return
def optimize(self):
for i in range(iterations):
print "iteration ", i
#Get the global best particle
gBest = self.swarm[0]
for j in range(swarmSize):
pBest = self.swarm[j].pBest
if self.f(pBest) > self.f(gBest):
gBest = pBest
solution = gBest
#Update position of each paricle
for k in range(swarmSize):
self.swarm[k].updateVelocities(gBest)
self.swarm[k].updatePositions()
self.swarm[k].satisfyConstraints()
#Update the personal best positions
for l in range(swarmSize):
pBest = self.swarm[l].pBest
if self.f(self.swarm[l]) > self.f(pBest):
self.swarm[l].pBest = self.swarm[l].pos
return solution
def f(self, solution):
#This is where the metaheuristic is defined
return random.random()
def main():
pso = ParticleSwarmOptimizer()
pso.optimize()

Well, you need to define a good cost function, which measures the error between the optimal number of layers and filters and current layers and filters.
But what goal you want to obtain? Minimize time cost or minimize the accuracy?
By the way, if its accuracy, it might be too costy, because for each population of particle swarm optimization, it would train your CNN model once.

Related

custom dqn agent can't output correct action

I customize a dqn agent to solve a circuit problem.for example, the state is 1D input which represents five nodes' value(node_0 to node_4),shape=(5,),and actions are choosing one of six components (like whose values are [0,1,2,3,4,5])to place in the circuit to get a new state,named state_.So the action_space is (6,).My goal is to make five values that in one state to reach fixed value as possible.For example,the initial state is [0.8,0.7,0.9,0.98,0.9] and my goal is to make five value higher than 0.95. I mean, i put a component whose value is 3, in node_0 and it becomes 0.95 from 0.8,it meets the requirments.and the node_3 don't need place a component because it is 0.98.And i set up a limit that the sum of placed components value can't over 10.
here are some hyperparameter:
gamma = 0.9
TARGET_REPLACE_ITER = 500
nodes = 5
memory_capability = 1000
batch_size = 30
epsilon_start =1
epsilon_end = 0.0001
epsilon_decay = 0.06
learning_rate = 0.001
epsilon = 1
n_state = 5
n_action = 6
I make two neural networks to do,one is eval_net and the other is target_net,the code is below:
class NN(nn.Module):
def __init__(self, ):
super(NN,self).__init__()
self.fc1 = nn.Linear(n_state, 16)
self.fc1.weight.data.normal_(0, 0.1)
self.fc2 = nn.Linear(16,32)
self.out = nn.Linear(32, n_action)
self.out.weight.data.normal_(0, 0.1)
def forward(self, x):
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = F.relu(x)
action_value = self.out(x)
return action_value
here is the agent:
class Ckt_Opt(object):
def __init__(self,):
self.learn_step_counter = 0
self.memory = np.zeros((memory_capability, n_state * 2 + 2))
self.memory_cntr = 0
self.eval_net, self.target_net = NN(), NN()
self.loss_func = nn.MSELoss()
self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=learning_rate)
def choose_action(self, state):
state = torch.unsqueeze(torch.FloatTensor(state),0)
if random.random() < epsilon:
action = random.randint(0,len(com_data) - 1) # choose a random component value,com_data is a list which means components' value ,from 0 to 5
else:
action_value = self.eval_net.forward(state)
action = torch.max(action_value, 1)[1].numpy()[0] # I copied this code from others and i guess it means choose the max Q value action
return action
def step(self,action):
self.ran_node = random.choice([a for a,x in enumerate(vio_node) if x == 1]) # x=1 means the node state value is lower than 0.95,ran_node means i randomly select node to place coponent
str1 = '.param cap_0_%d_val=%e\n' % (self.ran_node, com_data[action]) #
self.decap_param[self.ran_node] = com_data[action] # this two lines means change the placed component,it doesn't matter
def learn(self):
# target net update
if self.learn_step_counter % TARGET_REPLACE_ITER == 0:
self.target_net.load_state_dict(self.eval_net.state_dict())
self.learn_step_counter += 1
# sample from memory
sample_index = np.random.choice(memory_capability, batch_size)
b_memory = self.memory[sample_index, :]
b_s = torch.FloatTensor(b_memory[:, :n_state])
b_a = torch.LongTensor(b_memory[:, n_state:n_state + 1].astype(int))
b_r = torch.FloatTensor(b_memory[:, n_state + 1:n_state + 2])
b_s_ = torch.FloatTensor(b_memory[:, -n_state:])
q_eval = self.eval_net(b_s).gather(1, b_a) # shape (batch, 1)
q_next = self.target_net(b_s_).detach()
q_target = b_r + gamma * q_next.max(1)[0] # shape (batch, 1)
loss = self.loss_func(q_eval, q_target)
# calculate and update eval_net
self.optimizer.zero_grad()
loss.backward() # i don't understand this three lines
self.optimizer.step()
the main function is below:
ckt = Ckt_Opt()
for i in range(0,50):
ckt.reset() # no component is placed,get initial state
state = read_result() # a function to read state after taking action,here is reading initial state
for j in range(500):
action = pdn.choose_action(state)
state_,Cof,vio_node = pdn.step(action)
# Cof means whether the sum of component value is more than the limit(1 means more than limit, 0 not),vio_node means whether place component
reward = sum(-((state_ -0.95) * vio_node)**2 *500) + (nodes - sum(vio_node))if Cof == 0 else \
sum(-((state_ - 0.95) * vio_node)**2 *5000)
# equations mean give priority to placing components in nodes with low state value to improve the reward.for example,node_0 is 0.6 and node_1 is 0.9,the penalty(equals negative reward) of node_0 is -(0.6-0.95)^2 *500 = -61.25,and node_1's penalty is -(0.9-0.95)^2 *500 = -1.25
ckt.store_transition(state,action,reward,state_) # just store in the experience memory
state = state_
epsilon = epsilon_end +(epsilon_start - epsilon_end) * math.exp(-1. *epsilon_decay * i)
My goal is to find a solution that has the best reward.For example,the initial state is [0.6,0.7,0.8,0.9,0.97],and the placed component values are [5,4,0,1,0],get the best state is [0.85,0.9,0.91,0.93,0.97], it can't make every state value get over than 0.95 because of some reason.
But!!! I ran many times and always get a wired solution like [1,1,1,1,0] or [2,2,2,2,0], which is not make sense, i think it must be something wrong with choose_action function or learn function,but i can't find it because i am new to DQN
Could anyone help me ? thanks a lot

MCMC method 1D Ferromagnetic Ising Model

My question is related to the Python Coding of a 1-Dimensional Ising Model using a Markov Chain Monte Carlo method (MCMC).
I have the following Hamiltonian
$$H = - \sum_{i=1}^{L-1}\sigma_{i}sigma_{i+1} - B\sum_{i=1}^{L}\sigma_{i}$$
I want to write a python function that generates a Markov chain where at each step, it calculates and saves the magnetization (per site) and the energy.
The energy is (=Hamiltonian) and I will define the Magnetization as:
$$\frac{1}{L}\sum_{i}\sigma_{i}$$
My probability distribution would be:
$$p(x) = e^{-H\beta}$$ where, $T^{-1} = \beta$
For the Markov Chain I will implement a Metropolis-Hastings Algorithim;
if $$\frac{P(\sigma')}{P(\sigma)} = e^{(H(\sigma)-H(\sigma'))\beta}$$
My idea would be to accept transitions when
$$H(\sigma') < H(\sigma)$$
and to only accept transitions
$$H(\sigma') > H(\sigma)$$
with the probability
$$P = e^{(H(\sigma)-H(\sigma'))\beta}$$
So let me set a few parameters such as:
$L=20$ - Lattice Size
$T=2$ - Temperature
$B=0$ - Magnetic Field
I will need to plot a histogram of the magnetization and energy vs step size after the calculations. I have no issue with this part.
My python knowledge isn't great but I have included my rough (uncompleted) draft. I don't think I am making much progress. Any help would be great.
#Coding attempt MCMC 1-Dimensional Ising Model
import numpy as np
import matplotlib.pyplot as plt
#Shape of Lattice L
L = 20
Shape = (20,20)
#Spin Configuration
spins = np.random.choice([-1,1],Shape)
#Magnetic moment
moment = 1
#External magnetic field
field = np.full(Shape, 0)
#Temperature
Temperature = 2
Beta = Temperature**(-1)
#Interaction (ferromagnetic if positive, antiferromagnetic if negative)
interaction = 1
#Using Probability Distribution given
def get_probability(Energy1, Energy2, Temperature):
return np.exp((Energy1 - Energy2) / Temperature)
def get_energy(spins):
return -np.sum(
interaction * spins * np.roll(spins, 1, axis=0) +
interaction * spins * np.roll(spins, -1, axis=0) +
interaction * spins * np.roll(spins, 1, axis=1) +
interaction * spins * np.roll(spins, -1, axis=1)
)/2 - moment * np.sum(field * spins)
#Introducing Metropolis Hastings Algorithim
x_now = np.random.uniform(-1, 1) #initial value
d = 10**(-1) #delta
y = []
for i in range(L-1):
#generating next value
x_proposed = np.random.uniform(x_now - d, x_now + d)
#accepting or rejecting the value
if np.random.rand() < np.exp(-np.abs(x_proposed))/(np.exp(-np.abs(x_now))):
x_now = x_proposed
if i % 100 == 0:
y.append(x_proposed)
Here I changed your code to solve the problem the way I always do.
Please, check the code and formulas very carefully
#Coding attempt MCMC 1-Dimensional Ising Model
import numpy as np
import matplotlib.pyplot as plt
#Shape of Lattice L
L = 20
#Shape = (20)
#Number of Monte Carlo samples
MC_samples=1000
#Spin Configuration
spins = np.random.choice([-1,1],L)
print(spins)
#Magnetic moment
moment = 1
#External magnetic field
field = 0
#Temperature
Temperature = 2
Beta = Temperature**(-1)
#Interaction (ferromagnetic if positive, antiferromagnetic if negative)
interaction = 1
#Using Probability Distribution given
def get_probability(delta_energy, Temperature):
return np.exp(-delta_energy / Temperature)
def get_energy(spins):
energy=0
for i in range(L):
energy=energy+interaction*spins[i-1]*spins[i]
energy= energy-field*sum(spins)
return energy
def delta_energy(spins,random_spin):
#If you do flip one random spin, the change in energy is:
#(By using a reduced formula that only involves the spin
# and its neighbours)
if random_spin==L:
PBC=0
else:
PBC=random_spin+1
return -2*interaction*(spins[random_spin-1]*spins[random_spin]+
spins[random_spin]*spins[PBC]+field*spins[random_spin])
#Introducing Metropolis Hastings Algorithim
#x_now = np.random.uniform(-1, 1) #initial value
#d = 10**(-1) #delta
#y = []
magnetization=[]
energy=[]
for i in range(MC_samples):
#Each Monte Carlo step consists in L random spin moves
for j in range(L):
#Choosing a random spin
random_spin=np.random.randint(L-1,size=(1))
#Compuing the change in energy of this spin flip
delta=delta_energy(spins,random_spin)
#Metropolis accept-rejection:
if delta<0:
#Accept the move if its negative
spins[random_spin]=-spins[random_spin]
else:
#If its positive, we compute the probability
probability=get_probability(delta,Temperature)
random=np.random.rand()
if random<=probability:
#Accept de move
spins[random_spin]=-spins[random_spin]
#Afer the MC step, we measure the system
magnetization.append(sum(spins)/L)
energy.append(get_energy(spins))
print(magnetization,energy)
#Do histograms and plots
At the end of the simulation, the variables magnetization and energy are arrays that contain the measured values at each MC step.
You can directly use these arrays to compute the histograms and plots.
Note: The energy array, is the total energy of the system, not the energy/L.
I was looking for a simple implementation of a 1D Ising model, and came across this post. While I am no expert on the field, I did write my masters on a related topic. I implemented the code in Oriol Cabanas Tirapu's answer, and found a few bugs (I think).
Below is my adapted version oh their code. Hopefully it is useful for someone.
#Coding attempt MCMC 1-Dimensional Ising Model
import numpy as np
import matplotlib.pyplot as plt
#Using Probability Distribution given
def get_probability(delta_energy, Temperature):
return np.exp(-delta_energy / Temperature)
def get_energy(spins):
energy=0
for i in range(len(spins)):
energy=energy+interaction*spins[i-1]*spins[i]
energy= energy-field*sum(spins)
return energy
def delta_energy(spins,random_spin):
#If you do flip one random spin, the change in energy is:
#(By using a reduced formula that only involves the spin
# and its neighbours)
if random_spin==L-1:
PBC=0
else:
PBC=random_spin+1
old = -interaction*(spins[random_spin-1]*spins[random_spin] + spins[random_spin]*spins[PBC]) - field*spins[random_spin]
new = interaction*(spins[random_spin-1]*spins[random_spin] + spins[random_spin]*spins[PBC]) + field*spins[random_spin]
return new-old
def metropolis(L = 100, MC_samples=1000, Temperature = 1, interaction = 1, field = 0):
# intializing
#Spin Configuration
spins = np.random.choice([-1,1],L)
Beta = Temperature**(-1)
#Introducing Metropolis Hastings Algorithim
data = []
magnetization=[]
energy=[]
for i in range(MC_samples):
#Each Monte Carlo step consists in L random spin moves
for j in range(L):
#Choosing a random spin
random_spin=np.random.randint(0,L,size=(1))
#Compuing the change in energy of this spin flip
delta=delta_energy(spins,random_spin)
#Metropolis accept-rejection:
if delta<0:
#Accept the move if its negative
spins[random_spin]=-spins[random_spin]
#print('change')
else:
#If its positive, we compute the probability
probability=get_probability(delta,Temperature)
random=np.random.rand()
if random<=probability:
#Accept de move
spins[random_spin]=-spins[random_spin]
data.append(list(spins))
#Afer the MC step, we measure the system
magnetization.append(sum(spins)/L)
energy.append(get_energy(spins))
return data,magnetization,energy
def record_state_statistics(data,n=4):
ixs = tuple()
sub_sample = [[d[i] for i in range(n)] for d in data]
# get state number
state_nums = [int(sum([((j+1)/2)*2**i for j,i in zip(reversed(d),range(len(d)))])) for d in sub_sample]
return state_nums
# setting up problem
L = 200 # size of system
MC_samples = 1000 # number of samples
Temperature = 1 # "temperature" parameter
interaction = 1 # Strength of interaction between nearest neighbours
field = 0 # external field
# running MCMC
data = metropolis(L = L, MC_samples = MC_samples, Temperature = Temperature, interaction = interaction, field = field)
results = record_state_statistics(data[0],n=4) # I was also interested in the probability of each micro-state in a sub-section of the system
# Plotting
plt.figure(figsize=(20,10))
plt.subplot(2,1,1)
plt.imshow(np.transpose(data[0]))
plt.xticks([])
plt.yticks([])
plt.axis('tight')
plt.ylabel('Space',fontdict={'size':20})
plt.title('Critical dynamics in a 1-D Ising model',fontdict={'size':20})
plt.subplot(2,1,2)
plt.plot(data[2],'r')
plt.xlim((0,MC_samples))
plt.xticks([])
plt.yticks([])
plt.ylabel('Energy',fontdict={'size':20})
plt.xlabel('Time',fontdict={'size':20})

Why won't a simple AND gate neural network work without BIAS?

A simple neural net of 2 inputs and one output without a bias, like this - doesn't seem to work.
|input1||weight1 weight2| = Z
|input2|
output = sigmoid(Z)
Whereas, it works perfectly when BIAS is added, why does it work & what is the math behind it?
|input1||weight1 weight2| = Z
|input2|
output = sigmoid(Z - BIAS)
Here's the CODE to working version with BIAS:
import numpy as np
import random as r
import sys
def sigmoid(ip, derivate=False):
if derivate:
return ip*(1-ip)
return 1.0/(1+np.exp(-1*ip))
class NeuralNet:
global sigmoid
def __init__(self):
self.inputLayers = 2
self.outputLayer = 1
self.bias = r.random()
def setup(self):
self.i = np.array([r.random(), r.random()], dtype=float).reshape(2,)
self.w = np.array([r.random(), r.random()], dtype=float).reshape(2,)
def forward_propogate(self):
self.z = self.w*self.i
self.o = sigmoid(sum(self.z)-self.bias)
def optimize_cost(self, desired):
i=0
current_cost = pow(desired - self.o, 2)
for weight in self.w:
dpdw = -1*(desired-self.o) * (sigmoid(self.o, derivate=True)) * self.i[i]
self.w[i] = self.w[i] - 2*dpdw
i+=1
#calculate dp/dB
dpdB = -1*(desired-self.o) * (sigmoid(self.o, derivate=True)) * -1
self.bias = self.bias - 2*dpdB
self.forward_propogate()
def train(self, ip, op):
self.i = np.array(ip).reshape(2,)
self.forward_propogate()
self.optimize_cost(op[0])
n = NeuralNet()
n.setup()
# while sys.stdin.read(1):
success_rate = 0
trial=0
done = False
while not done:
a = [0.1,1,0.1,1]
b = [0.1,0.1,1,1]
c = [0,0,0,1]
for i in range(len(a)):
trial +=1
n.train([a[i],b[i]],[c[i]])
if c[i] - n.o < 0.01:
success_rate +=1
print(100*success_rate/trial, "%")
if 100*success_rate/trial > 99 and trial > 4:
print(100*success_rate/trial, "%")
print("Network trained, took: {} trials".format(trial))
print("Network weights:{}, bias:{}".format(n.w, n.bias))
done = True
break
A bias is just a shift of the intercept. The NN you have set up in this example appears to be a single layer neural network with no hidden layers, which is effectively a logistic regression, which is just a linear model.
When you don't learn an intercept value, the intercept defaults to 0, so it always passes through the origin and you're just learning the slope of the line. To correctly classify the AND of your data, i.e. the top right corner at (1,1), but not any of the other points, you need a non zero intercept because there is no line that passes through the origin that will only have the top right corner on one side and the other three points on the other side.

SGD implementation Python

I am aware that SGD has been asked before on SO but I wanted to have an opinion on my code as below:
import numpy as np
import matplotlib.pyplot as plt
# Generating data
m,n = 10000,4
x = np.random.normal(loc=0,scale=1,size=(m,4))
theta_0 = 2
theta = np.append([],[1,0.5,0.25,0.125]).reshape(n,1)
y = np.matmul(x,theta) + theta_0*np.ones(m).reshape((m,1)) + np.random.normal(loc=0,scale=0.25,size=(m,1))
# input features
x0 = np.ones([m,1])
X = np.append(x0,x,axis=1)
# defining the cost function
def compute_cost(X,y,theta_GD):
return np.sum(np.power(y-np.matmul(np.transpose(theta_GD),X),2))/2
# initializations
theta_GD = np.append([theta_0],[theta]).reshape(n+1,1)
alp = 1e-5
num_iterations = 10000
# Batch Sum
def batch(i,j,theta_GD):
batch_sum = 0
for k in range(i,i+9):
batch_sum += float((y[k]-np.transpose(theta_GD).dot(X[k]))*X[k][j])
return batch_sum
# Gradient Step
def gradient_step(theta_current, X, y, alp,i):
for j in range(0,n):
theta_current[j]-= alp*batch(i,j,theta_current)/10
theta_updated = theta_current
return theta_updated
# gradient descent
cost_vec = []
for i in range(num_iterations):
cost_vec.append(compute_cost(X[i], y[i], theta_GD))
theta_GD = gradient_step(theta_GD, X, y, alp,i)
plt.plot(cost_vec)
plt.xlabel('iterations')
plt.ylabel('cost')
I was trying a mini-batch GD with a batch size of 10. I am getting extremely oscillatory behavior for the MSE. Where's the issue? Thanks.
P.S. I was following NG's https://www.coursera.org/learn/machine-learning/lecture/9zJUs/mini-batch-gradient-descent
This is a description of the underlying mathematical principle, not a code based solution...
The cost function is highly nonlinear (np.power()) and recursive and recursive and nonlinear systems can oscillate ( self-oscillation https://en.wikipedia.org/wiki/Self-oscillation ). In mathematics this is subject to chaos theory / theory of nonlinear dynamical systems ( https://pdfs.semanticscholar.org/8e0d/ee3c433b1806bfa0d98286836096f8c2681d.pdf ), cf the Logistic Map
( https://en.wikipedia.org/wiki/Logistic_map ). The logistic map oscillates if the growth factor r exceeds a threshold. The growth factor is a measure for how much energy is in the system.
In your code the critical parts are the cost function, the cost vector, that is the history of the system and the time steps :
def compute_cost(X,y,theta_GD):
return np.sum(np.power(y-np.matmul(np.transpose(theta_GD),X),2))/2
cost_vec = []
for i in range(num_iterations):
cost_vec.append(compute_cost(X[i], y[i], theta_GD))
theta_GD = gradient_step(theta_GD, X, y, alp,i)
# Gradient Step
def gradient_step(theta_current, X, y, alp,i):
for j in range(0,n):
theta_current[j]-= alp*batch(i,j,theta_current)/10
theta_updated = theta_current
return theta_updated
If you compare this to an implementation of the logistic map you see the similarities
from pylab import show, scatter, xlim, ylim
from random import randint
iter = 1000 # Number of iterations per point
seed = 0.5 # Seed value for x in (0, 1)
spacing = .0001 # Spacing between points on domain (r-axis)
res = 8 # Largest n-cycle visible
# Initialize r and x lists
rlist = []
xlist = []
def logisticmap(x, r): <------------------ nonlinear function
return x * r * (1 - x)
# Return nth iteration of logisticmap(x. r)
def iterate(n, x, r):
for i in range(1,n):
x = logisticmap(x, r)
return x
# Generate list values -- iterate for each value of r
for r in [i * spacing for i in range(int(1/spacing),int(4/spacing))]:
rlist.append(r)
xlist.append(iterate(randint(iter-res/2,iter+res/2), seed, r)) <--------- similar to cost_vector, the history of the system
scatter(rlist, xlist, s = .01)
xlim(0.9, 4.1)
ylim(-0.1,1.1)
show()
source of code : https://www.reddit.com/r/learnpython/comments/zzh28/a_simple_python_implementation_of_the_logistic_map/
Basing on this you can try to modify your cost function by introducing a factor similar to the growth factor in the logistic map to reduce the intensity of oscillation of the system
def gradient_step(theta_current, X, y, alp,i):
for j in range(0,n):
theta_current[j]-= alp*batch(i,j,theta_current)/10 <--- introduce a factor somewhere to keep the system under the oscillation threshold
theta_updated = theta_current
return theta_updated
or
def compute_cost(X,y,theta_GD):
return np.sum(np.power(y-np.matmul(np.transpose(theta_GD),X),2))/2 <--- introduce a factor somewhere to keep the system under the oscillation threshold
If this is not working maybe follow the suggestions in https://www.reddit.com/r/MachineLearning/comments/3y9gkj/how_can_i_avoid_oscillations_in_gradient_descent/ ( timesteps,... )

CAR model from pymc2 to PyMC3

I'm still a noob in PyMC3, so the question might me naive, but I don't know how to translate this pymc2 code in pymc3. In particular it's not clear to me how to translate the R function.
beta = pymc.Normal('beta', mu=0, tau=1.0e-4)
s = pymc.Uniform('s', lower=0, upper=1.0e+4)
tau = pymc.Lambda('tau', lambda s=s: s**(-2))
### Intrinsic CAR
#pymc.stochastic
def R(tau=tau, value=np.zeros(N)):
# Calculate mu based on average of neighbors
mu = np.array([sum(W[i]*value[A[i]])/Wplus[i] for i in xrange(N)])
# Scale precision to the number of neighbors
taux = tau*Wplus
return pymc.normal_like(value, mu, taux)
#pymc.deterministic
def M(beta=beta, R=R):
return [np.exp(beta + R[i]) for i in xrange(N)]
obsvd = pymc.Poisson("obsvd", mu=M, value=Y, observed=True)
model = pymc.Model([s, beta, obsvd])
Code from https://github.com/Youki/statistical-modeling-for-data-analysis-with-python/blob/945c13549a872d869e33bc48082c42efc022a07b/Chapter11/Chapter11.rst, and http://glau.ca/?p=340
Can you help me? Thanks
In PyMC3, you can implement the CAR model using the scan function of Theano. There is a sample code in their documentation. There are two implementations for CAR in the linked document. Here is the first one [Source]:
from theano import scan
floatX = "float32"
from pymc3.distributions import continuous
from pymc3.distributions import distribution
class CAR(distribution.Continuous):
"""
Conditional Autoregressive (CAR) distribution
Parameters
----------
a : list of adjacency information
w : list of weight information
tau : precision at each location
"""
def __init__(self, w, a, tau, *args, **kwargs):
super(CAR, self).__init__(*args, **kwargs)
self.a = a = tt.as_tensor_variable(a)
self.w = w = tt.as_tensor_variable(w)
self.tau = tau*tt.sum(w, axis=1)
self.mode = 0.
def get_mu(self, x):
def weigth_mu(w, a):
a1 = tt.cast(a, 'int32')
return tt.sum(w*x[a1])/tt.sum(w)
mu_w, _ = scan(fn=weigth_mu,
sequences=[self.w, self.a])
return mu_w
def logp(self, x):
mu_w = self.get_mu(x)
tau = self.tau
return tt.sum(continuous.Normal.dist(mu=mu_w, tau=tau).logp(x))
with pm.Model() as model1:
# Vague prior on intercept
beta0 = pm.Normal('beta0', mu=0.0, tau=1.0e-5)
# Vague prior on covariate effect
beta1 = pm.Normal('beta1', mu=0.0, tau=1.0e-5)
# Random effects (hierarchial) prior
tau_h = pm.Gamma('tau_h', alpha=3.2761, beta=1.81)
# Spatial clustering prior
tau_c = pm.Gamma('tau_c', alpha=1.0, beta=1.0)
# Regional random effects
theta = pm.Normal('theta', mu=0.0, tau=tau_h, shape=N)
mu_phi = CAR('mu_phi', w=wmat, a=amat, tau=tau_c, shape=N)
# Zero-centre phi
phi = pm.Deterministic('phi', mu_phi-tt.mean(mu_phi))
# Mean model
mu = pm.Deterministic('mu', tt.exp(logE + beta0 + beta1*aff + theta + phi))
# Likelihood
Yi = pm.Poisson('Yi', mu=mu, observed=O)
# Marginal SD of heterogeniety effects
sd_h = pm.Deterministic('sd_h', tt.std(theta))
# Marginal SD of clustering (spatial) effects
sd_c = pm.Deterministic('sd_c', tt.std(phi))
# Proportion sptial variance
alpha = pm.Deterministic('alpha', sd_c/(sd_h+sd_c))
trace1 = pm.sample(1000, tune=500, cores=4,
init='advi',
nuts_kwargs={"target_accept":0.9,
"max_treedepth": 15})
The M function is written here as:
mu = pm.Deterministic('mu', tt.exp(logE + beta0 + beta1*aff + theta + phi))

Categories