Code slowing down when using prioritised replay buffer with Ray

Code slowing down when using prioritised replay buffer with Ray - python

I have code that trains a DQN in the distributed setting. When using a standard replay buffer that all the workers push their experiences too, the code runs perfectly fine.
However, when I switch to a prioritised replay buffer things start to slow down massively. The code for the two buffers is given below:
#ray.remote
class PrioritizedReplayBuffer:
def __init__(self, capacity, alpha=0.6, beta=0.4, beta_increment_per_sampling=0.001, batch_size=128):
self.capacity = capacity
self.alpha = alpha
self.beta = beta
self.beta_increment_per_sampling = beta_increment_per_sampling
self.buffer = []
self.pos = 0
self.priorities = []
self.batch_size = batch_size
def push(self, data):
for experience in data:
max_priority = max(self.priorities) if self.buffer else 1.0
if len(self.buffer) < self.capacity:
self.buffer.append(experience)
self.priorities.append(max_priority)
else:
self.buffer[self.pos] = experience
self.priorities[self.pos] = max_priority
self.pos = (self.pos + 1) % self.capacity
def sample(self):
start = time.time()
N = len(self.buffer)
if N == self.capacity:
priorities = np.array(self.priorities)
else:
priorities = np.array(self.priorities[:self.pos])
self.beta = min(1.0, self.beta + self.beta_increment_per_sampling)
sampling_probabilities = priorities ** self.alpha
sampling_probabilities = sampling_probabilities / sampling_probabilities.sum()
indices = random.choices(range(N), k=self.batch_size, weights=sampling_probabilities)
experiences = [self.buffer[idx] for idx in indices]
weights = np.array([(self.capacity * priorities[i]) ** -self.beta for i in indices])
weights = weights / weights.max()
end = time.time()
print(f"sampling took {(end - start) / 60} minutes")
return experiences, np.array(indices), weights
def update_priorities(self, indices, priorities):
for idx, priority in zip(indices, priorities):
self.priorities[idx] = priority
def __len__(self):
return len(self.buffer)
#ray.remote
class ReplayBuffer:
def __init__(self, capacity, batch_size=128):
self.capacity = capacity
self.buffer = []
self.batch_size = batch_size
def push(self, data):
for experience in data:
self.buffer.append(experience)
def sample(self):
return random.sample(self.buffer, self.batch_size)
def __len__(self):
return len(self.buffer)
The code for my workers looks like this:
#ray.remote
class Actor(object):
def __init__(self, state_dim, action_dim, exploration_decay, exploration_min, worker_id=None, replay_buffer=None, param_server=None, push_size=20, num_grad_steps=1e6):
self.worker_id = worker_id
self.env = gym.make('LunarLander-v2')
self.net = Net(state_dim, action_dim)
# get ray_remote objects; centralized buffer and parameter server
self.replay_buffer = replay_buffer
self.param_server = param_server
self.push_size = push_size # this is how much data we need until we push to the centralised buffer
self.num_grad_steps = num_grad_steps
self.epsilon = 1
self.exploration_decay = exploration_decay
self.exploration_min = exploration_min
self.action_dim = action_dim
def act(self, state):
if np.random.uniform() < self.epsilon:
self.epsilon = max(self.epsilon * self.exploration_decay, self.exploration_min)
return np.random.randint(0, self.action_dim)
else:
state = torch.FloatTensor(state)
with torch.no_grad():
values = self.net(state)
action = torch.argmax(values)
return int(action)
def sync_with_param_server(self):
new_actor_params = ray.get(self.param_server.return_params.remote())
for param in new_actor_params:
new_actor_params[param] = torch.from_numpy(new_actor_params[param]).float()
self.net.load_state_dict(new_actor_params)
def run(self):
state = self.env.reset()
episode_reward = 0
episode = 0
ep_length = 0
grad_steps = 0
intermediate_memory = [] # this is what we will push to the buffer at once
while grad_steps < self.num_grad_steps:
ep_length += 1
action = self.act(state)
next_state, reward, done, _ = self.env.step(action)
intermediate_memory.append((state, action, reward, next_state, done))
if len(intermediate_memory) >= self.push_size:
self.replay_buffer.push.remote(intermediate_memory)
intermediate_memory = []
self.sync_with_param_server()
grad_steps = ray.get(self.param_server.return_grad_steps.remote())
# time.sleep(60 * 5)
episode_reward += reward
if done:
# print results locally
# print(f"Episode {episode}: {episode_reward}")
# print_status(self.env, time_step)
# prepare new rollout
episode += 1
episode_reward = 0
ep_length = 0
next_state = self.env.reset()
state = next_state
I've narrowed the problem down somewhat -- when I uncomment the sleep command in the actor, the speed of the code goes back to usual once this is in effect, i.e. when the actors aren't pushing any data to the buffer. The thing is that I am not sure why actors pushing to the replay buffer would cause it to be slow in the learning steps when it doesn't make a difference for the vanilla replay buffer.
Any help pinpointing what is causing the problem and how to fix it would be greatly appreciated.

Related

NeuralNetwork on Python from RapidMiner

Hi right now i have to implement a neural network from rapidminer into a python script to predict the category from a value, but i cant get what is wrong with my program please help, the input is from the dataset i have and it should be high but it shows high and medium.
enter image description here
wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww
import pandas as pd
import math
def sig(x):
return 1 / (1 + math.exp(-x))
class Perceptron:
#Constructor
def __init__(self,weights,bias):
self.weights = weights
self.bias = bias
self.output = 0
def setOutput(self,value):
self.output = value
def getBias(self):
return self.bias
def getOutput(self):
return self.output
def guess(self,input):
sum = 0
for i in range(10):
sum = sum + (input[i]*self.weights[i])
sum = sum + self.bias
self.output = sig(sum)
return self.output
class Output:
def __init__(self,nodes,threshold,name):
self.nodes = nodes
self.threshold = threshold
self.name = name
def guess(self,input):
sum = 0
for i in range(len(self.nodes)):
sum = sum + (input[i] * self.nodes[i])
sigmoid = sig(sum)
if(sigmoid > self.threshold):
print(self.name)
def getT(self):
return self.nodes
nodeDic = {
'weight' : [[3.291,-4.360,1.080,-0.692,0.424,0.081,-0.118,-0.832,-0.414,0.430],
[3.393,-4.429,1.120,-0.642,0.490,0.061,-0.111,-0.803,-0.432,0.382],
[-6.534,2.866,-3.405,-0.216,-1.573,0.609,0.291,-0.103,0.233,0.020],
[3.032,-3.974,0.991,-1.656,1.245,-1.006,1.471,0.844,0.177,1.565],
[-3.665,1.610,-1.432,-0.118,-0.734,0.364,0.182,0.090,0.098,0.028],
[-2.242,1.857,-0.375,0.093,-0.100,-0.297,0.263,0.389,0.014,0.529],
[-0.969,0.386,0.264,-0.641,0.787,-0.322,0.098,-0.144,0.111,0.599],
[-3.390,1.358,-1.745,-0.206,-0.640,0.593,0.332,0.130,0.237,-0.422]],
'name' : ['node1','node2','node3','node4','node5','node6','node7','node8'],
'bias' : [1.228,1.271,-0.392,-0.500,-0.428,-0.570,0.749,-0.381]}
outputDic = {
'class': ['medium','high','low'],
'threshold' : [-7.403,0.729,0.241],
'nodeWeight' : [[2.163,2.278,5.438,3.309,1.041,-1.606,-3.127,1.394],
[2.244,2.320,-4.653,1.370,-3.436,-2.759,-0.751,-2.838],
[-3.989,-4.047,1.250,-3.119,1.501,1.710,0.453,0.844]]}
input = [0.6,0.807,10.4,0,10,0.999,0.0012,0.01175573463375,7,15]
hidden1 = []
for i in range(0,8):
(nodeDic['name'])[i] = Perceptron((nodeDic['weight'])[i],(nodeDic['bias'])[i])
hidden1.append((nodeDic['name'])[i])
outputLayer = []
for i in range(0,3):
(outputDic['class'])[i] = Output((outputDic['nodeWeight'])[i],(outputDic['threshold'])[i],(outputDic['class'])[i])
outputLayer.append((outputDic['class'])[i])
nodeOutput = []
for i in range(0,8):
nodeOutput.append(hidden1[i].guess(input))
#print(nodeOutput)
for i in range(0,3):
outputLayer[i].guess(nodeOutput)

I built a neural network from scratch, but the results i get on passing a batch of inputs is incorrect, i think the error is in backpass, help anyone?

The code for generating a custom neural network and training it.
It has 3 Parts:
1)Activation Functions > Activation
2)Custom Layer generation > Layer
3)Gradient descent and backpropagation > Back_Pass
when I pass a single input, the network adapts well, but when I pass a batch of inputs the answers are totally wrong.
The error I doubt is in the cost evaluation of the back pass but I don't know what it is.
class Activations:
def Relu(self, input):
self.output = np.maximum(0, input)
self.deri = (self.output > 0).astype(int)
def Softmax(self, input):
input = input - np.max(input, axis = 1 , keepdims = True)
self.output = np.exp(input)/np.sum(np.exp(input),axis = 1, keepdims = True)
self.deri = self.output*(1- self.output)
class Layer(Activations):
def __init__(self, input_neurons, next_neurons, bias_req = 0):
self.weights = np.random.randn(input_neurons, next_neurons)
self.bias_req = bias_req
if bias_req == 1:
self.bais = np.random.randn(1,1)
else:
self.bais = [[0]]
def forward(self, inputs, activation):
self.inputs = np.array(inputs)
x = np.dot(self.inputs, self.weights) +self.bais
self.activation = activation
if activation == 'Relu':
self.Relu(x)
elif activation == "Softmax":
self.Softmax(x)
else:
self.output = x
self.deri = (self.output > self.output - 1).astype(int)
class Back_Pass:
def loss(self, expected, predicted):
self.cost = np.sum(0.5*(predicted - expected)**2, axis =0)/len(predicted)
self.error = np.sum((predicted - expected), axis = 0)/len(predicted)
def back(self, this_layer):
self.error = (this_layer.deri)* self.error
weights_buffer = this_layer.weights
if this_layer.bias_req == 1:
this_layer.bais -= l_rate*np.sum(self.error)
#Input maybe single or in batches
if len(self.error) == 1:
this_layer.weights -= l_rate*np.dot(this_layer.inputs.T, self.error)
else:
for i in range(len(self.error)):
this_layer.weights -= l_rate*np.dot(this_layer.inputs[i].T, self.error[i])
self.error = np.dot(self.error, weights_buffer.T)

Update the class attribute value

I met a problem with call the value from one class into a new iterative loop outside of that class. The code is shown below: (data and newdata are vectors)
class A:
def __init__(self, k, tol=0.0001, max_iter=300):
self.k = k
self.tol = tol
self.max_iter = max_iter
def fit(self, data):
self.centroids = {}
for i in range(self.k):
self.centroids[i] = data[i+50]
for i in range(self.max_iter):
self.classifications = {}
for i in range(self.k):
self.classifications[i] = []
for featureset in data:
distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
prev_centroids = dict(self.centroids)
for classification in self.classifications:
self.centroids[classification] = np.average(self.classifications[classification], axis=0)
optimized = True
for c in self.centroids:
original_centroid = prev_centroids[c]
current_centroid = self.centroids[c]
if np.sum((current_centroid - original_centroid) / original_centroid * 100.0) > self.tol:
#print(np.sum((current_centroid - original_centroid) / original_centroid * 100.0))
optimized = False
if optimized:
break
def cluster_labels(self,data):
cluster_labels = []
for featureset in data:
distances=[np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
cluster_labels.append(distances.index(min(distances)))
return cluster_labels
def predict(self, data):
distances = [np.linalg.norm(data - self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
return classification
def update(self, new_data, delta):
for featureset in new_data:
distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
if min(distances) < delta:
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
self.centroids[classification] = np.average(self.classifications[classification], axis=0)
else:
self.centroids[self.k] = featureset
self.classifications[self.k] = []
self.classifications[self.k].append(featureset)
self.k = self.k + 1
k = self.k
print (k)
return k
class Recorder:
def __init__(rec):
rec.p = pyaudio.PyAudio()
rec.stream = rec.p.open(format = pyaudio.paInt16, channels = 1, rate = 44100, input = True, input_device_index = 2, frames_per_buffer = chunk)
def write():
a = A(k=3)
a.fit(data)
k=a.update(newdata,20)
for num in range(1,100):
rec.Recorder()
rec.write()
Initially, I want to set k =3. And then, the value of k should be updated with k=a.update(newdata,20) However,now for every running, the value of K is staying at 3. And if I set k = 3 outside of the classes it always shows the error :
UnboundLocalError: local variable 'k' referenced before assignment
How could I solve this problem?

The issue is in this function:
def update(self, new_data, delta):
for featureset in new_data:
distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
if min(distances) < delta:
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
self.centroids[classification] = np.average(self.classifications[classification], axis=0)
else:
self.centroids[self.k] = featureset
self.classifications[self.k] = []
self.classifications[self.k].append(featureset)
self.k = self.k + 1
k = self.k
You are only setting the "k" value inside the "else" block. Leaving out anything unrelated it looks like this:
def update(self, new_data, delta):
for featureset in new_data:
...
if min(distances) < delta:
...
else:
...
k = self.k
print (k) # <-- error here
return k # <-- error here
In the case where min(dinstances) >= delta, k will not be set and you will get the error you report.
You have two options:
Add a k = ... line into the if-block where min(distances) < delta
Add a k = ... line just above the if-block (still inside the for-block) to set a "default" value for k
On review it is also possible that you just need to return self.k instead of just k.

multidimensional Array out of bounds

I am doing something wrong and cannot figure out. I have a multidimentional matrix total this stores. I am messing up the sequencing somewhere just cannot understand how or why. The error is IndexError: index 253 is out of bounds for axis 0 with size 253
class pricing_floatinglookback:
def __init__(self, spot, rate, sigma, time, sims, steps):
self.spot = spot
self.rate = rate
self.sigma = sigma
self.time = time
self.sims = sims
self.steps = steps+1
self.dt = self.time / self.steps
def call_floatingstrike(self):
SimPriceMin = np.array([])
SimPriceAtMaturity = np.array([])
call2 = np.array([])
pathwiseS= np.zeros((self.steps,),float)
total = np.zeros((self.sims,self.steps),float)
for j in range(self.sims):
pathwiseS[0] =self.spot## This will be one dimensional array from 0 to 253
total[j,0] = self.spot ## This will be multidimensional array with columns 0 to 800 and rows 0 to 253
for i in range(self.steps):
phi = np.random.normal()
pathwiseS[i+1] = pathwiseS[i]*(1+self.rate*self.dt+self.sigma*phi*np.sqrt(self.dt))## -->This is where i am going wrong.
total[j,i+1]= pathwiseS[i+1] ## -->This is where i am going wrong.
SimPriceAtMaturity = np.append(SimPriceAtMaturity, pathwiseS[self.steps - 1])
call2 = np.append(call2,max((pathwiseS[self.steps - 1])-self.spot,0))
SimPriceMin = np.append(SimPriceMin, min(pathwiseS))
callbsm = np.average(call2)
call = max(np.average(SimPriceAtMaturity) - np.average(SimPriceMin), 0)
return call, total.reshape(self.sims, self.steps), np.average(SimPriceMin), callbsm*np.exp(-self.rate*self.time)
pricelookback = pricing_floatinglookback(100, 0.05, 0.2, 1, 800, 252)
clookback, callmatrix, calmin, callbsm = pricelookback.call_floatingstrike()
print (callbsm)
plt.plot(callmatrix.T)

So I fixed it. I don't know why or how but it's fixed
class pricing_floatinglookback:
def __init__(self, spot, rate, sigma, time, sims, steps):
self.spot = spot
self.rate = rate
self.sigma = sigma
self.time = time
self.sims = sims
self.steps = steps+1
self.dt = self.time / self.steps
def call_floatingstrike(self):
SimPriceMin = np.array([])
SimPriceAtMaturity = np.array([])
call2 = np.array([])
pathwiseS= np.zeros((self.steps,),float)
total = np.zeros((self.sims,self.steps),float)
for j in range(self.sims):
pathwiseS[0] =self.spot
total[j,0] = self.spot
for i in range(self.steps-1):##--->This was the main reason, dont know why but it was!
phi = np.random.normal()
pathwiseS[i+1] = pathwiseS[i]*(1+self.rate*self.dt+self.sigma*phi*np.sqrt(self.dt))
total[j,i]= pathwiseS[i+1]##--->This as suggested in the comments
SimPriceAtMaturity = np.append(SimPriceAtMaturity, pathwiseS[self.steps - 1])
call2 = np.append(call2,max((pathwiseS[self.steps - 1])-self.spot,0))
SimPriceMin = np.append(SimPriceMin, min(pathwiseS))
callbsm = np.average(call2)
call = max(np.average(SimPriceAtMaturity) - np.average(SimPriceMin), 0)
return call, total.reshape(self.sims, self.steps), np.average(SimPriceMin), callbsm*np.exp(-self.rate*self.time)

Cannot get next batch's embedding indexes

The first module minibatch.
import numpy as np
import tensorflow as tf
import Utils.neighbor_samplers as samplers
class Minibatch:
def __init__(self, embedding, batch_size):
self.embedding = embedding
self.batch_idx = [i for i in range(self.embedding.shape[0])]
self.batch_size = batch_size
self.iters = 0
self.batch_permutation = np.random.permutation(self.batch_idx) #List
self.next_batch = []
self.max_iters = embedding.shape[0] // self.batch_size
def next_batch(self):
self.start_idx = self.iters * self.batch_size
self.iters += 1
self.end_idx = self.start_idx + self.batch_size
#next_batch = tf.nn.embedding_lookup(embedding, [i for i in range(start_idx, end_idx)])
self.next_idx = self.batch_permutation[self.start_idx : self.end_idx]
#next_neighbors = tf.nn.embedding_lookup(self.embedding, next_idx)
def shuffle(self):
self.batch_permutation = np.random.permutation(self.batch_idx)
self.batch_num = 0
and the second module model.
def train(self):
batch = minibatch.Minibatch(self.normal_embedding, self.batch_size)
for epoch in range(self.epoch):
batch.shuffle()
print('Epoch : %04d' %(epoch + 1))
for iter in range(batch.max_iters):
if iter % 100 == 0 and iter != 0:
print('%d iters done' %(iter))
next_idx = batch.next_batch
With the last line next_idx = batch.next_batch, I want to get the next batch's indexes to look up in embeddings. But it keeps showing empty list as result.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Code slowing down when using prioritised replay buffer with Ray - python

Related

NeuralNetwork on Python from RapidMiner

I built a neural network from scratch, but the results i get on passing a batch of inputs is incorrect, i think the error is in backpass, help anyone?

Update the class attribute value

multidimensional Array out of bounds

Cannot get next batch's embedding indexes

Categories

Resources