I met a programming issue about class a function. it seems like I can not class it correctly. Can you please point out the issue? THANK YOU !
class NTXentLoss(nn.Module):
def __init__(self, temp=0.5):
super(NTXentLoss, self).__init__()
self.temp = temp
def forward(self, zi, zj):
batch_size = zi.shape[0]
z_proj = torch.cat((zi, zj), dim=0)
cos_sim = torch.nn.CosineSimilarity(dim=-1)
sim_mat = cos_sim(z_proj.unsqueeze(1), z_proj.unsqueeze(0))
sim_mat_scaled = torch.exp(sim_mat/self.temp)
r_diag = torch.diag(sim_mat_scaled, batch_size)
l_diag = torch.diag(sim_mat_scaled, -batch_size)
pos = torch.cat([r_diag, l_diag])
diag_mat = torch.exp(torch.ones(batch_size * 2)/self.temp).cuda()
logit = -torch.log(pos/(sim_mat_scaled.sum(1) - diag_mat))
loss = logit.mean()
return loss
sent_A = l2norm(recov_A, dim=1)
sent_emb_A = l2norm(imgs_A, dim=1)
sent_B = l2norm(recov_B, dim=1)
sent_emb_B = l2norm(imgs_B, dim=1)
G_cons = NTXentLoss(sent_A,sent_emb_A) + NTXentLoss(sent_B,sent_emb_B)
What's wrong with this, I just gave two positional arguments? or
G_cons = NTXentLoss.forward(sent_A,sent_emb_A) + NTXentLoss.forward(sent_B,sent_emb_B)
You need to first initiate a NTXentLoss object before you can call it. For instance:
ntx = NTXentLoss()
G_cons = ntx(sent_A,sent_emb_A) + ntx(sent_B,sent_emb_B)
this is fragment of my code
def train(self, features, targets):
for X, y in zip(features, targets):
X = X.reshape(1, X.shape[0])
outputs = self.feed_forward(X)
when I try to use the method with data:
train(np.array([gameDataList[n].ball_position, gameDataList[n].wall_position]), np.array(gameDataList[n].upOrDown))
where gameDataList[n].upOrDown is an array e.g. [0.1, 0.9], and gameDataList[n].ball_position and gameDataList[n].wall_position are floats, I get this error.
Full code:
#### Imports ####
import numpy as np
#### Neural Network Class ####
class MLP:
##### Constructor ####
def __init__(self, n_input_nodes, hidden_nodes, n_output_nodes, lr):
## Network ##
self.n_input_nodes = n_input_nodes
self.n_output_nodes = n_output_nodes
self.nodes = hidden_nodes
self.nodes.insert(0, n_input_nodes)
self.nodes.append(n_output_nodes)
## Weights and Biases##
self.weights = []
self.biases = []
for i in range(1, len(self.nodes)):
self.weights.append(np.random.uniform(-1.0, 1.0, (self.nodes[i - 1], self.nodes[i])))
self.biases.append(np.random.uniform(-1.0, 1.0, (1, self.nodes[i])))
## Learning Rate ##
self.lr = lr
## Activation Functions ##
# Linear Activation
self.linear = lambda x: x
self.d_linear = lambda x: np.ones(x.shape)
# Relu Activation
def relu(x):
x[x < 0] = 0
return x
def d_relu(out):
out: x[x > 0] = 1
return out
self.relu = relu
self.d_relu = d_relu
# Sigmoid Activation
self.sigmoid = lambda x: 1 / (1 + np.exp(-x))
self.d_sigmoid = lambda out: out * (1 - out) # assumes out is tanh(x)
# Hyperbolic Tangent Activation
self.tanh = lambda x: np.tanh(x)
self.d_tanh = lambda out: 1 - out ** 2 # assumes out is tanh(x)
def getWeights(self):
return self.weights.copy()
def getBiases(self):
return self.biases.copy()
def setWeights(self, weights):
self.weights = weights.copy()
def setBiases(self, biases):
self.biases = biases.copy()
#### Feed Forward ####
def feed_forward(self, X):
outputs = [X]
logits = np.dot(X, self.weights[0]) + self.biases[0]
for i in range(1, len(self.nodes) - 1):
out = self.sigmoid(logits)
outputs.append(out)
logits = np.dot(out, self.weights[i]) + self.biases[i]
out = self.sigmoid(logits)
outputs.append(out)
return outputs
#### Backpropagation ####
def backpropagation(self, X, y, outputs):
weights_gradients = []
biases_gradients = []
d1 = y - outputs[-1]
d2 = self.d_sigmoid(outputs[-1])
error = d1 * d2
grad = outputs[-2].T * error
weights_gradients.append(grad)
biases_gradients.append(error)
for i in range(len(self.weights) - 2, 1, -1):
d = self.d_sigmoid(outputs[i])
error = np.dot(error, self.weights[i + 1].T) * d
grad = outputs[i - 1].T * error
weights_gradients.append(grad)
biases_gradients.append(error)
return weights_gradients, biases_gradients
#### Training ####
def train(self, features, targets):
# Batch Size for weight update step
batch_size = features.shape[0]
# Delta Weights Variables
delta_weights = [np.zeros(weight.shape) for weight in self.weights]
delta_biases = [np.zeros(bias.shape) for bias in self.biases]
# For every data point, forward pass, backpropogation, store weights change
for X, y in zip(features, targets):
# Forward pass
X = X.reshape(1, X.shape[0])
outputs = self.feed_forward(X)
# Back propogation
weights_gradients, biases_gradients = self.backpropagation(X, y, outputs)
for i in range(len(weights_gradients)):
delta_weights[-(i + 1)] += weights_gradients[i]
delta_biases[-(i + 1)] += biases_gradients[i]
for i in range(len(delta_weights)):
self.weights[i] += (self.lr * delta_weights[i]) / batch_size
self.biases[i] += (self.lr * delta_biases[i]) / batch_size
#### Testing Methods ####
def predict(self, X):
# Gives prediction
return self.feed_forward(X)[-1]
def test(self, features, targets):
predictions = self.predict(features)
n_correct = 0
for i in range(len(predictions)):
prediction = np.argmax(predictions[i])
correct = np.argmax(targets[i])
if prediction == correct:
n_correct += 1
return n_correct / len(targets)
class GameData:
def __init__(self, ball_position, wall_position, upOrDown):
self.wall_position = wall_position
self.ball_position = ball_position
self.upOrDown = upOrDown
I collect data, and train my network, in this way:
gameDataList.append(GameData(ball.trt.ycor(), b.trt.ycor(), [0.1, 0.9]))
mlp = MLP(2, [32, 32], 2, 0.0001)
n = random.randint(0, 999)
mlp.train(np.array([gameDataList[n].ball_position, gameDataList[n].wall_position]), np.array(gameDataList[n].upOrDown))
Problem solved. It was needed to write two square brackets instead of one.
wrong example:
np.array([gameDataList[n].ball_position, gameDataList[n].wall_position])
correct example:
np.array([[gameDataList[n].ball_position, gameDataList[n].wall_position]])
I'm implementing deep deterministic policy gradient (DDPG) to solve my problem by following this tutorial (https://www.youtube.com/watch?v=GJJc1t0rtSU) and using this python source code (https://github.com/philtabor/Youtube-Code-Repository/blob/master/ReinforcementLearning/PolicyGradient/DDPG/pendulum/tensorflow/ddpg_orig_tf.py).
The number of actions is 3 (alpha, beta, gamma) and the number of dimension of state is 2. I want to get the value of 3 actions in [0,1] so I changed the output layer (third layer) of class Actor from "tanh" function to "sigmoid" function in file "ddpg_orig_tf.py". However, when I tried this algorithm to solve my problem, it only obtained value 0 or 1 {0,1} in 3 actions, not changed over time in interval [0,1]. I think problem is not activation function, I tried changed to tanh and it also obtained only {-1,1}.
Here is my changed code in "ddpg_orig_tf.py": I changed "tanh" to "sigmoid" in output layer
def build_network(self):
with tf.variable_scope(self.name):
self.input = tf.placeholder(tf.float32,
shape=[None, *self.input_dims],
name='inputs')
self.action_gradient = tf.placeholder(tf.float32,
shape=[None, self.n_actions],
name='gradients')
f1 = 1. / np.sqrt(self.fc1_dims)
dense1 = tf.layers.dense(self.input, units=self.fc1_dims,
kernel_initializer=random_uniform(-f1, f1),
bias_initializer=random_uniform(-f1, f1))
batch1 = tf.layers.batch_normalization(dense1)
layer1_activation = tf.nn.relu(batch1)
f2 = 1. / np.sqrt(self.fc2_dims)
dense2 = tf.layers.dense(layer1_activation, units=self.fc2_dims,
kernel_initializer=random_uniform(-f2, f2),
bias_initializer=random_uniform(-f2, f2))
batch2 = tf.layers.batch_normalization(dense2)
layer2_activation = tf.nn.relu(batch2)
f3 = 0.003
mu = tf.layers.dense(layer2_activation, units=self.n_actions,
activation='sigmoid',
kernel_initializer= random_uniform(-f3, f3),
bias_initializer=random_uniform(-f3, f3))
self.mu = tf.multiply(mu, self.action_bound)
Here is my Environment:
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from os import path
class P_NOMAEnv():
def __init__(self, distance1, distance2, power, B=15000, N0=10**-20, path_loss=2, g=1):
self.B = B #bandwidth
self.N0 = N0
self.path_loss = path_loss
self.g = g
self.alpha_low = 0.
self.alpha_high = 1.
self.beta_low = 0.
self.beta_high = 1.
self.gamma_low = 0.
self.gamma_high = 1.
self.distance1 = np.random.randint(30,500)
self.distance2 = 2*distance1
self.power = power
self.max_iteration = 1000
self.high = np.array([self.B, self.power])
self.action_space = spaces.Box(low=0., high=1., shape=(3,), dtype=np.float32)
self.observation_space = spaces.Box(low=np.array([0.1, 0.0001]), high=np.array([self.B, self.power]), dtype=np.float32)
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def cal_SINR_near(self, alpha, beta, gamma, g, distance1, path_loss, power, B, N0):
h_near = g*(distance1**-path_loss)
channel_noise = B*N0 # 1 subchannel
non_overlap = (np.square(np.absolute(h_near))*power*0.5*(1-beta))/channel_noise
overlap = (np.square(np.absolute(h_near))*power*gamma*(alpha+beta)*0.5)/(channel_noise + (np.square(np.absolute(h_near))*power*(1-gamma)*(alpha+beta)*0.5))
SINR_near = non_overlap + overlap
return SINR_near
def cal_SINR_far(self, alpha, beta, gamma, g, distance2, path_loss, power, B, N0):
h_far = g*(distance2**-path_loss)
channel_noise = B*N0 # 1 subchannel
non_overlap = (np.square(np.absolute(h_far))*power*0.5*(1-alpha))/channel_noise
overlap = (np.square(np.absolute(h_far))*power*(1-gamma)*(alpha+beta)*0.5)/(channel_noise
+ (np.square(np.absolute(h_far))*power*gamma*(alpha+beta)*0.5))
SINR_far = non_overlap + overlap
return SINR_far
def cal_sum_rate(self, SINR_near, SINR_far, B, alpha, beta):
R_near = (1+alpha)*0.5*B*np.log2(1+SINR_near)
R_far = (1+beta)*0.5*B*np.log2(1+SINR_far)
sum_rate = R_near + R_far # reward
return sum_rate
def normalize(self, x):
normalized = (x+1.2)/2.4
return normalized
def step(self, action):
self.steps_taken += 1
B,P = self.state
new_alpha = np.clip(action, self.alpha_low, self.alpha_high)[0]
new_beta = np.clip(action, self.beta_low, self.beta_high)[1]
new_gamma = np.clip(action, self.gamma_low, self.gamma_high)[2]
SINR_near = self.cal_SINR_near(new_alpha, new_beta, new_gamma, self.g, self.distance1, self.path_loss, self.power, self.B, self.N0)
SINR_far = self.cal_SINR_far(new_alpha, new_beta, new_gamma, self.g, self.distance2, self.path_loss, self.power, self.B, self.N0)
reward = self.cal_sum_rate(SINR_near, SINR_far, self.B, new_alpha, new_beta)
done = self.steps_taken >= self.max_iteration
B_new=(1-new_beta)*0.5*self.B + (new_alpha+new_beta)*0.5*self.B
P_new=(1-new_beta)*0.5*self.power + (new_alpha+new_beta)*0.5*new_gamma*self.power
self.state = np.array([B_new, P_new])
return self._get_obs(action), reward, done, {}, new_alpha, new_beta, new_gamma
def _get_obs(self, action):
new_alpha = np.clip(action, self.alpha_low, self.alpha_high)[0]
new_beta = np.clip(action, self.beta_low, self.beta_high)[1]
new_gamma = np.clip(action, self.gamma_low, self.gamma_high)[2]
B_new=(1-new_beta)*0.5*self.B + (new_alpha+new_beta)*0.5*self.B
P_new=(1-new_beta)*0.5*self.power + (new_alpha+new_beta)*0.5*new_gamma*self.power
return np.array([B_new, P_new])
def reset(self):
self.steps_taken = 0
a = np.random.random_sample((3,))
self.state = self._get_obs(a)
return self._get_obs(a)
Here is my main file:
import os
import gym
import numpy as np
from ddpg import Agent
from Environment import P_NOMAEnv
from utils import plotLearning
if __name__ == '__main__':
a = np.random.randint(30,250)
env = P_NOMAEnv(distance1=100, distance2=200, power=2, B=15000, N0=10**-20, path_loss=2, g=1)
agent = Agent(alpha=0.00005, beta=0.0005, input_dims=[2], tau=0.001,
env=env, batch_size=64, layer1_size=400, layer2_size=300,
n_actions=3)
np.random.seed(0)
score_history = []
score_history2 = []
for i in range(4000):
obs = env.reset()
done = False
score = 0
maxi = np.zeros(4,)
while not done:
act = agent.choose_action(obs)
new_state, reward, done, info, alpha, beta, gamma = env.step(act)
agent.remember(obs, act, reward, new_state, int(done))
agent.learn()
score += reward
obs = new_state
if reward>maxi[0]: maxi = [reward, alpha, beta, gamma]
#env.render()
score_history.append(maxi[0])
score_history2.append(score/1000)
print('episode ', i+1, ', reward ', np.around(score/1000, decimals=4), ', max reward: ', np.around(maxi[0], decimals=4), ', with alpha: ', np.around(alpha, decimals=4), ', beta: ', np.around(beta, decimals=4), ', gamma: ', np.around(gamma, decimals=4), 'trailing 100 episodes avg ', np.mean(score_history2[-100:]))
I tried to print out noise, mu(output of actor):
def choose_action(self, state):
state = state[np.newaxis, :]
mu = self.actor.predict(state) # returns list of list
noise = self.noise()
mu_prime = mu + noise
print("Noise: ", noise, "mu: ", mu, "mu_prime: ", mu_prime[0])
return mu_prime[0]
When I run main, the results showed as follows:
Noise: [-0.26362168 -0.01389367 -0.39754398] mu: [[1. 0. 0.]] mu_prime: [ 0.73637832 -0.01389367 -0.39754398]
Noise: [-0.29287953 -0.03729832 -0.39651476] mu: [[1. 0. 0.]] mu_prime: [ 0.70712047 -0.03729832 -0.39651476]
.........
As you can see, mu always get the value 0 or 1 {0,1}, not in interval [0,1]. I tried more than 1000 episodes but it did not change over time.
The issues is probably in my environment but I do not know how to fix it. If you have any idea about that, please help me to solve it, I appreciate that.
You can't use sigmoid for multi-label output. You'll need to use softmax
I want to build a code that can calculate matrix multiplication in a neural network without tensorflow or np.dot or np.matmul.
The following is the piece of the code that I interested in:
class Affine:
def __init__(self, W, b):
self.W = W
self.b = b
self.x = None
self.original_x_shape = None
self.dW = None
self.db = None
def forward(self, x):
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
out = np.dot(self.x, self.W) + self.b
return out
The code is a part of forward calculation of a neural net (X*W+b). And it works well.
I want to modify the line out = np.dot(self.x, self.W) + self.b. It should work in the same way without np.dot or np.matmul.
The following is my code:
class Affine2:
def __init__(self, W, b):
self.W = W
self.b = b
self.x = None
self.original_x_shape = None
self.dW = None
self.db = None
def forward(self, x):
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
rows_A = len(self.x)
cols_A = len(self.x[0])
rows_B = len(self.W)
cols_B = len(self.W[0])
if cols_A != rows_B:
print("Cannot multiply the two matrices. Incorrect dimensions.")
return
# Create the result matrix
start_time = time.time()
out = np.zeros((rows_A, cols_B))
def matmult(i):
time.sleep(1)
# for i in range(rows_A):
for j in range(cols_B):
for k in range(cols_A):
out[i][j] += self.x[i][k] * self.W[k][j]
if __name__ == '__main__':
pool = Pool(process_num)
start_time = int(time.time())
pool.map(matmult, range(0, rows_A))
print("Seconds: %s" % (time.time()-start_time))
return out
The modified part is just parallel matrix multiplication. However, the following error occured: AttributeError: Can't pickle local object 'Affine2.forward.<locals>.matmult'
How can I solve the problem?
i am studying machine learning with python.
and this code is from Standford Uiv classes.
i was trying to grasp these codes but failed.
The problem is loss_W = lambda W: self.loss(x,t).
isn't it True that loss_W(1) or loss_W(2) or anything cannot change the result?
i can't understand that the results of these two codes are different.
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
for idx in range(x.size):
tmp_val = x[idx]
# f(x+h)
x[idx] = float(tmp_val) + h
fxh1 = f(x)
# f(x-h)
x[idx] = tmp_val - h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val
return grad
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x, W1)
z1 = sigmoid(a1)
a2 = np.dot(z1,W2)
y = softmax(a2)
return y
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y,t)
def accuracy(self, x,t):
y = self.predict(x)
y = np.argmax(y, axis=0)
t = np.argmax(t, axis=0)
data_len = len(x)
accuracy = np.sum(y==t)/float(data_len)
return accuracy
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x,t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
This lambda alone loss_W = lambda W: self.loss(x,t) is indifferent from the value of W. This function can be simplified like this:
x = 1 # Just some random value
t = 5 # Just some random value
def simplified_lambda_function(W):
return (x,t)
The code snippet you posted indicates that there is a class definition outside somewhere as
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1']
self is undefined here. Because of this we cant be sure if they are truly identical, but most likely they are.