Related
I was running DSSM_N and DSSM on a dataset with batch size 512 on 2060.
However,
DSSM_N costs ~35ms per batch
DSSM. costs ~400ms per batch.
What makes this huge performance difference? I have checked profiling which said
that DSSM costs ~350ms on All Others Time. How can I fix the DSSM implementation?
Many thanks in advance.
Edited as suggested by Micheal:
The main difference is DSSM makes a hash-table-like lookup (notice tf.nn.embedding_lookup and IntegerLookup) which makes the dataset preprocess a little bit simpler while in DSSM_N this lookup was done in dataset preprocess in advance. However, I don't believe this simple hash table like makes such a big difference. What was I doing wrong?
import pickle
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_hub as hub
import tensorflow_text as text # required for BERT hub model
from keras.layers import Layer, Embedding, Dense, Concatenate, BatchNormalization, Dropout, Dot, Hashing, TextVectorization, GRU, IntegerLookup
from keras import Model
import random
from ..config import *
from ..util import *
def embedding_sequence_reduce_mean(x, mask):
# float[B,L,E], bool[B,L] -> float[B,E]
x = tf.ragged.boolean_mask(x, mask) # (B, Lr, E) remove masked data
x = tf.reduce_mean(x, axis=1) # (B, E)
x = tf.where(tf.math.is_nan(x), 0.0, x) # nan to 0
return x
def embedding_masked_to_zero(x, mask):
mask = tf.expand_dims( # B -> B 1 align for broadcasting
tf.cast(mask, dtype=tf.float32), axis=1)
return x * mask
USER_ID_DIM = 128
MEDIA_ID_DIM = 64
GENRE_DIM = 32
ORIGIN_DIM = 32
LATENT_DIM = latent_dim
N_HASH = 8
N_BIN = 1024
print('N_HASH', N_HASH)
print('N_BIN', N_BIN)
class HashEmbedding(Layer):
# TODO: with_importance is not supported
def __init__(
self, n_hash, n_bin, output_dim,
embeddings_initializer='uniform', embeddings_regularizer=None,
activity_regularizer=None, embeddings_constraint=None,
mask_zero=False, input_length=None, **kwargs
):
super(HashEmbedding, self).__init__()
self.mask_zero = mask_zero
self.n_hash = n_hash
self.n_bin = n_bin
# salts no duplication
self.salts = random.sample(range(self.n_hash * 32), self.n_hash)
self.hashs = [Hashing(
num_bins=self.n_bin,
# if mask_zero then hash 0 to 0
mask_value=(0 if self.mask_zero else None),
salt=self.salts[i])
for i in range(self.n_hash)]
self.embedding = Embedding(
self.n_bin, output_dim,
embeddings_initializer=embeddings_initializer,
embeddings_regularizer=embeddings_regularizer,
activity_regularizer=activity_regularizer,
embeddings_constraint=embeddings_constraint,
mask_zero=mask_zero, input_length=input_length
)
def compute_mask(self, inputs, mask=None):
if not self.mask_zero:
return None
return tf.not_equal(inputs, 0)
def call(self, inputs):
shape = inputs.shape
hash = tf.stack([hash(inputs) # [I], n_hash
for hash in self.hashs], axis=len(shape))
x = self.embedding(hash) # [I], n_hash, emb_dim
x = tf.reduce_sum(x, axis=len(shape)) # [I], emb_dim
return x
class StringVectorization(Layer):
def __init__(self, vocab, embedding_dim=32, output_dim=16):
super(StringVectorization, self).__init__()
self.text_vectorization = TextVectorization(
vocabulary=vocab, split='character')
self.embedding = Embedding(
self.text_vectorization.vocabulary_size(), embedding_dim, mask_zero=True)
self.gru = GRU(output_dim)
def call(self, inputs): # B, S
x = self.text_vectorization(inputs)
x = self.embedding(x)
return self.gru(x)
class TfBertZh(Layer): # 128 - 2 input length limit
def __init__(self): # output_dim 768
super(TfBertZh, self).__init__()
self.preprocess = hub.KerasLayer(
zh_preprocessor_model_file, trainable=False)
self.encoder = hub.KerasLayer(zh_encoder_model_file, trainable=False)
def call(self, inputs):
x = self.preprocess(inputs)
x = self.encoder(x)['pooled_output']
return x
class DNN(Layer):
def __init__(self):
super(DNN, self).__init__()
self.concat = Concatenate(axis=1)
self.dense1 = Dense(64)
self.bn = BatchNormalization()
self.drop = Dropout(0.1)
self.dense2 = Dense(32)
def call(self, inputs: list):
from keras.activations import tanh
x = self.concat(inputs)
x = self.drop(tanh(self.bn(self.dense1(x))))
x = tanh(self.dense2(x))
return x
with open(stats_file_pkl, 'rb') as f:
sinfo = pickle.load(f)
with open(vocab_file_pkl, 'rb') as f:
vocab = pickle.load(f)
class DSSM_N(Model):
def __init__(self):
super(DSSM_N, self).__init__()
self.user_id = HashEmbedding(
N_HASH, N_BIN, USER_ID_DIM, mask_zero=True)
self.item_id = Embedding(
sinfo['media_id']['unique'], MEDIA_ID_DIM, mask_zero=True)
self.genre = Embedding(
sinfo['genre_id']['unique'], GENRE_DIM, mask_zero=True)
self.origin = Embedding(
sinfo['origin_id']['unique'], ORIGIN_DIM, mask_zero=True)
self.user_dnn = DNN()
self.item_dnn = DNN()
self.dot = Dot(axes=1, normalize=False)
def call(self, inputs):
u = self.compute_user_latent({'id': inputs['user']})
n_pos = inputs['pos'].shape[1]
n_neg = inputs['neg'].shape[1]
ui_pos = []
ui_neg = []
def signal(u, i):
return tf.exp(self.dot([u, i]))
for j in range(n_pos):
i = self.compute_item_latent({
'id': inputs['pos'][:, j],
'genre': inputs['pos_genre'][:, j, :], # B N 4
'origin': inputs['pos_origin'][:, j, :] # B N 2
})
ui_pos.append(signal(u, i))
ui_pos = tf.add_n(ui_pos)
for j in range(n_neg):
i = self.compute_item_latent({
'id': inputs['neg'][:, j],
'genre': inputs['neg_genre'][:, j, :],
'origin': inputs['neg_origin'][:, j, :]
})
ui_neg.append(signal(u, i))
ui_neg = tf.add_n(ui_neg)
return tf.squeeze(ui_pos / (ui_pos + ui_neg))
def compute_user_latent(self, inputs):
id = self.user_id(inputs['id'])
latent = self.user_dnn([id])
return latent
def compute_item_latent(self, inputs):
id = self.item_id(inputs['id'])
genre = self.genre(inputs['genre']) # B 4 -> B 4 E
genre = embedding_sequence_reduce_mean(genre, genre._keras_mask)
origin = self.origin(inputs['origin']) # B 2 -> B 2 E
origin = embedding_sequence_reduce_mean(origin, origin._keras_mask)
latent = self.item_dnn([id, genre, origin])
return latent
user_df = pd.read_pickle(preprocessed_user_file_pkl)
media_df = pd.read_pickle(preprocessed_media_file_pkl)
genre_df = pd.read_pickle(clean_genre_file_pkl)
origin_df = pd.read_pickle(clean_origin_file_pkl)
class MediaPreprocess(Layer):
def __init__(self):
super(MediaPreprocess, self).__init__()
self.lookup = IntegerLookup(vocabulary=list(media_df['id']))
self.genre_table = tf.Variable(
[[0] * 4] + list(media_df['genre']), dtype=tf.int32, trainable=False)
self.origin_table = tf.Variable(
[[0] * 2] + list(media_df['origin']), dtype=tf.int32, trainable=False)
self.id_embedding = Embedding(
self.lookup.vocabulary_size() + 1, MEDIA_ID_DIM, mask_zero=True)
self.genre_embedding =\
Embedding(genre_df['id'].max() + 1, GENRE_DIM, mask_zero=True)
self.origin_embedding =\
Embedding(origin_df['id'].max() + 1, ORIGIN_DIM, mask_zero=True)
def __call__(self, inputs):
index = self.lookup(inputs) # B -> B
vector = self.id_embedding(index) # B -> B E
vector = embedding_masked_to_zero(vector, vector._keras_mask)
genre = tf.nn.embedding_lookup(self.genre_table, index)
genre = self.genre_embedding(genre)
genre = embedding_sequence_reduce_mean(genre, genre._keras_mask)
origin = tf.nn.embedding_lookup(self.origin_table, index)
origin = self.origin_embedding(origin)
origin = embedding_sequence_reduce_mean(origin, origin._keras_mask)
return {
'id': vector,
'genre': genre,
'origin': origin}
class UserPreprocess(Layer):
def __init__(self):
super(UserPreprocess, self).__init__()
self.lookup = IntegerLookup(vocabulary=list(user_df['id']))
self.embedding = HashEmbedding(
N_HASH, N_BIN, USER_ID_DIM, mask_zero=True)
def __call__(self, inputs):
vector = self.embedding(inputs)
vector = embedding_masked_to_zero(vector, vector._keras_mask)
return {'id': vector}
class DSSM(Model):
def __init__(self, *args, **kwargs):
super(DSSM, self).__init__()
self.user_pp = UserPreprocess()
self.item_pp = MediaPreprocess()
self.user_nn = DNN()
self.item_nn = DNN()
dot = Dot(axes=1, normalize=False)
self.signal = lambda u, i: tf.exp(dot([u, i]))
def call(self, inputs):
user = inputs['user'] # B
pos_s = inputs['pos'] # B N_POS=1
neg_s = inputs['neg'] # B N_NEG=7
n_pos = pos_s.shape[1]
n_neg = neg_s.shape[1]
u = self.user_pp(user)['id'] # B E(uid)
u = self.user_nn([u]) # B L
def compute_ui(i_s, count):
ui = []
for j in range(count):
i = self.item_pp(i_s[:, j])
i = self.item_nn([i['id'], i['genre'], i['origin']])
ui.append(self.signal(u, i))
return tf.add_n(ui) # C B 1 -> B 1
pos_ui = compute_ui(pos_s, n_pos) # B 1
neg_ui = compute_ui(neg_s, n_neg) # B 1
return tf.squeeze(pos_ui / (neg_ui + pos_ui)) # B
this is fragment of my code
def train(self, features, targets):
for X, y in zip(features, targets):
X = X.reshape(1, X.shape[0])
outputs = self.feed_forward(X)
when I try to use the method with data:
train(np.array([gameDataList[n].ball_position, gameDataList[n].wall_position]), np.array(gameDataList[n].upOrDown))
where gameDataList[n].upOrDown is an array e.g. [0.1, 0.9], and gameDataList[n].ball_position and gameDataList[n].wall_position are floats, I get this error.
Full code:
#### Imports ####
import numpy as np
#### Neural Network Class ####
class MLP:
##### Constructor ####
def __init__(self, n_input_nodes, hidden_nodes, n_output_nodes, lr):
## Network ##
self.n_input_nodes = n_input_nodes
self.n_output_nodes = n_output_nodes
self.nodes = hidden_nodes
self.nodes.insert(0, n_input_nodes)
self.nodes.append(n_output_nodes)
## Weights and Biases##
self.weights = []
self.biases = []
for i in range(1, len(self.nodes)):
self.weights.append(np.random.uniform(-1.0, 1.0, (self.nodes[i - 1], self.nodes[i])))
self.biases.append(np.random.uniform(-1.0, 1.0, (1, self.nodes[i])))
## Learning Rate ##
self.lr = lr
## Activation Functions ##
# Linear Activation
self.linear = lambda x: x
self.d_linear = lambda x: np.ones(x.shape)
# Relu Activation
def relu(x):
x[x < 0] = 0
return x
def d_relu(out):
out: x[x > 0] = 1
return out
self.relu = relu
self.d_relu = d_relu
# Sigmoid Activation
self.sigmoid = lambda x: 1 / (1 + np.exp(-x))
self.d_sigmoid = lambda out: out * (1 - out) # assumes out is tanh(x)
# Hyperbolic Tangent Activation
self.tanh = lambda x: np.tanh(x)
self.d_tanh = lambda out: 1 - out ** 2 # assumes out is tanh(x)
def getWeights(self):
return self.weights.copy()
def getBiases(self):
return self.biases.copy()
def setWeights(self, weights):
self.weights = weights.copy()
def setBiases(self, biases):
self.biases = biases.copy()
#### Feed Forward ####
def feed_forward(self, X):
outputs = [X]
logits = np.dot(X, self.weights[0]) + self.biases[0]
for i in range(1, len(self.nodes) - 1):
out = self.sigmoid(logits)
outputs.append(out)
logits = np.dot(out, self.weights[i]) + self.biases[i]
out = self.sigmoid(logits)
outputs.append(out)
return outputs
#### Backpropagation ####
def backpropagation(self, X, y, outputs):
weights_gradients = []
biases_gradients = []
d1 = y - outputs[-1]
d2 = self.d_sigmoid(outputs[-1])
error = d1 * d2
grad = outputs[-2].T * error
weights_gradients.append(grad)
biases_gradients.append(error)
for i in range(len(self.weights) - 2, 1, -1):
d = self.d_sigmoid(outputs[i])
error = np.dot(error, self.weights[i + 1].T) * d
grad = outputs[i - 1].T * error
weights_gradients.append(grad)
biases_gradients.append(error)
return weights_gradients, biases_gradients
#### Training ####
def train(self, features, targets):
# Batch Size for weight update step
batch_size = features.shape[0]
# Delta Weights Variables
delta_weights = [np.zeros(weight.shape) for weight in self.weights]
delta_biases = [np.zeros(bias.shape) for bias in self.biases]
# For every data point, forward pass, backpropogation, store weights change
for X, y in zip(features, targets):
# Forward pass
X = X.reshape(1, X.shape[0])
outputs = self.feed_forward(X)
# Back propogation
weights_gradients, biases_gradients = self.backpropagation(X, y, outputs)
for i in range(len(weights_gradients)):
delta_weights[-(i + 1)] += weights_gradients[i]
delta_biases[-(i + 1)] += biases_gradients[i]
for i in range(len(delta_weights)):
self.weights[i] += (self.lr * delta_weights[i]) / batch_size
self.biases[i] += (self.lr * delta_biases[i]) / batch_size
#### Testing Methods ####
def predict(self, X):
# Gives prediction
return self.feed_forward(X)[-1]
def test(self, features, targets):
predictions = self.predict(features)
n_correct = 0
for i in range(len(predictions)):
prediction = np.argmax(predictions[i])
correct = np.argmax(targets[i])
if prediction == correct:
n_correct += 1
return n_correct / len(targets)
class GameData:
def __init__(self, ball_position, wall_position, upOrDown):
self.wall_position = wall_position
self.ball_position = ball_position
self.upOrDown = upOrDown
I collect data, and train my network, in this way:
gameDataList.append(GameData(ball.trt.ycor(), b.trt.ycor(), [0.1, 0.9]))
mlp = MLP(2, [32, 32], 2, 0.0001)
n = random.randint(0, 999)
mlp.train(np.array([gameDataList[n].ball_position, gameDataList[n].wall_position]), np.array(gameDataList[n].upOrDown))
Problem solved. It was needed to write two square brackets instead of one.
wrong example:
np.array([gameDataList[n].ball_position, gameDataList[n].wall_position])
correct example:
np.array([[gameDataList[n].ball_position, gameDataList[n].wall_position]])
I am trying to implement from scratch the multiclass logistic regression but my implementation returns bad results. I believe the definition of the gradient function and the cost function is fine. Maybe there is a problem with how these functions are interacting with the minimize function. I have tried it but I could not find out what is wrong. Could you please cast some light?
You can add the estimator 'myLR': myLR(**par_dict), with paramters
par_dict= {'alpha': 0.1, 'maxit': 2000, 'opt_method': 'bfgs', 'positive': False, 'penalty': None, 'verbose': True, 'seed': 3}
in this example or in any of these examples to test it.
import numpy as np
from scipy.optimize import minimize
from sklearn import preprocessing
class myLR():
def __init__(self, alpha=0.1, reltol=1e-8, maxit=1000, opt_method=None, verbose=True, seed=0):
self.alpha = alpha
self.maxit = maxit
self.reltol = reltol
self.seed = seed
self.verbose = verbose
self.opt_method = opt_method
self.lbin = preprocessing.LabelBinarizer()
def w_2d(self, w, n_classes):
return np.reshape(w, (-1, n_classes), order='F')
def softmax(self, W, X):
a = np.exp(X # W)
o = a / np.sum(a, axis=1, keepdims=True)
return o
def cost_wraper(self, W):
return self.cost(W, self.X, self.T, self.n_samples, self.n_classes)
def cost(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
log_O = np.log(self.softmax(W, X))
reg = self.apha * np.linalg.norm(W, ord='fro')
c = -np.sum([np.vdot(T[[i]], log_O[[i]]) for i in range(n_samples)]) / n_samples + reg
return c
def gradient_wraper(self, W):
return self.gradient(W, self.X, self.T, self.n_samples, self.n_classes)
def gradient(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
O = self.softmax(W, X)
reg = self.alpha * W
grad = -X.T.dot(T - O) / n_samples + reg
return grad.flatten()
def fit(self, X, y=None):
self.n_classes = len(np.unique(y))
self.n_samples, n_features = X.shape
if self.n_classes == 2:
self.T = np.zeros((self.n_samples, self.n_classes), dtype=np.float64)
for i, cls in enumerate(range(self.n_classes)):
self.T[y == cls, i] = 1
else:
self.T = self.lbin.fit_transform(y)
self.X = X
np.random.seed(self.seed)
W_0 = np.random.random(n_features * self.n_classes)
options = {'disp': self.verbose, 'maxiter': self.maxit}
f_min = minimize(fun=self.cost_wraper, x0=W_0,
method=self.opt_method,
jac=self.gradient_wraper,
options=options)
self.coef_ = self.w_2d(f_min.x, self.n_classes)
self.W_ = self.coef_
return self
def predict_proba(self, X):
O = self.softmax(self.coef_, X)
return O
def predict(self, X):
sigma = self.predict_proba(X)
y_pred = np.argmax(sigma, axis=1)
return y_pred
Edit: Regularization term is included.
I think it is now working with the following code.
import numpy as np
from scipy.optimize import minimize
from sklearn import preprocessing
class myLR():
def __init__(self, reltol=1e-8, maxit=1000, opt_method=None, verbose=True, seed=0):
self.maxit = maxit
self.reltol = reltol
self.seed = seed
self.verbose = verbose
self.opt_method = opt_method
self.lbin = preprocessing.LabelBinarizer()
def w_2d(self, w, n_classes):
return np.reshape(w, (n_classes, -1))
def softmax(self, W, X):
a = np.exp(X # W.T)
o = a / np.sum(a, axis=1, keepdims=True)
return o
def squared_norm(self, x):
x = np.ravel(x, order='K')
return np.dot(x, x)
def cost(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
log_O = np.log(self.softmax(W, X))
c = -(T * log_O).sum()
return c / n_samples
def gradient(self, W, X, T, n_samples, n_classes):
W = self.w_2d(W, n_classes)
O = self.softmax(W, X)
grad = -(T - O).T.dot(X)
return grad.ravel() / n_samples
def fit(self, X, y=None):
n_classes = len(np.unique(y))
n_samples, n_features = X.shape
if n_classes == 2:
T = np.zeros((n_samples, n_classes), dtype=np.float64)
for i, cls in enumerate(np.unique(y)):
T[y == cls, i] = 1
else:
T = self.lbin.fit_transform(y)
np.random.seed(self.seed)
W_0 = np.random.random((self.n_classes, self.n_features))
options = {'disp': self.verbose, 'maxiter': self.maxit}
f_min = minimize(fun=self.cost, x0=W_0,
args=(X, T, n_samples, n_classes),
method=self.opt_method,
jac=self.gradient,
options=options)
self.coef_ = self.w_2d(f_min.x, n_classes)
self.W_ = self.coef_
return self
def predict_proba(self, X):
O = self.softmax(self.W_, X)
return O
def predict(self, X):
sigma = self.predict_proba(X)
y_pred = np.argmax(sigma, axis=1)
return y_pred
I want to build a code that can calculate matrix multiplication in a neural network without tensorflow or np.dot or np.matmul.
The following is the piece of the code that I interested in:
class Affine:
def __init__(self, W, b):
self.W = W
self.b = b
self.x = None
self.original_x_shape = None
self.dW = None
self.db = None
def forward(self, x):
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
out = np.dot(self.x, self.W) + self.b
return out
The code is a part of forward calculation of a neural net (X*W+b). And it works well.
I want to modify the line out = np.dot(self.x, self.W) + self.b. It should work in the same way without np.dot or np.matmul.
The following is my code:
class Affine2:
def __init__(self, W, b):
self.W = W
self.b = b
self.x = None
self.original_x_shape = None
self.dW = None
self.db = None
def forward(self, x):
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
rows_A = len(self.x)
cols_A = len(self.x[0])
rows_B = len(self.W)
cols_B = len(self.W[0])
if cols_A != rows_B:
print("Cannot multiply the two matrices. Incorrect dimensions.")
return
# Create the result matrix
start_time = time.time()
out = np.zeros((rows_A, cols_B))
def matmult(i):
time.sleep(1)
# for i in range(rows_A):
for j in range(cols_B):
for k in range(cols_A):
out[i][j] += self.x[i][k] * self.W[k][j]
if __name__ == '__main__':
pool = Pool(process_num)
start_time = int(time.time())
pool.map(matmult, range(0, rows_A))
print("Seconds: %s" % (time.time()-start_time))
return out
The modified part is just parallel matrix multiplication. However, the following error occured: AttributeError: Can't pickle local object 'Affine2.forward.<locals>.matmult'
How can I solve the problem?
i am studying machine learning with python.
and this code is from Standford Uiv classes.
i was trying to grasp these codes but failed.
The problem is loss_W = lambda W: self.loss(x,t).
isn't it True that loss_W(1) or loss_W(2) or anything cannot change the result?
i can't understand that the results of these two codes are different.
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
for idx in range(x.size):
tmp_val = x[idx]
# f(x+h)
x[idx] = float(tmp_val) + h
fxh1 = f(x)
# f(x-h)
x[idx] = tmp_val - h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val
return grad
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x, W1)
z1 = sigmoid(a1)
a2 = np.dot(z1,W2)
y = softmax(a2)
return y
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y,t)
def accuracy(self, x,t):
y = self.predict(x)
y = np.argmax(y, axis=0)
t = np.argmax(t, axis=0)
data_len = len(x)
accuracy = np.sum(y==t)/float(data_len)
return accuracy
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x,t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
This lambda alone loss_W = lambda W: self.loss(x,t) is indifferent from the value of W. This function can be simplified like this:
x = 1 # Just some random value
t = 5 # Just some random value
def simplified_lambda_function(W):
return (x,t)
The code snippet you posted indicates that there is a class definition outside somewhere as
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1']
self is undefined here. Because of this we cant be sure if they are truly identical, but most likely they are.