Issue with Tensorflow method - python

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
Nclass = 500
D = 2
M = 3
K = 3
X1 = np.random.randn(Nclass, D) + np.array([0, -2])
X2 = np.random.randn(Nclass, D) + np.array([2, 2])
X3 = np.random.randn(Nclass, D) + np.array([-2, 2])
X = np.vstack ([X1, X2, X3]).astype(np.float32)
Y = np.array([0]*Nclass + [1]*Nclass + [2]*Nclass)
plt.scatter(X[:,0], X[:,1], c=Y, s=100, alpha=0.5)
plt.show()
N = len(Y)
T = np.zeros((N, K))
for i in range(N):
T[i, Y[i]] = 1
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def forward(X, W1, b1, W2, b2):
Z = tf.nn.sigmoid(tf.matmul(X, W1) + b1)
return tf.matmul(Z, W2) + b2
tfX = tf.placeholder(tf.float32, [None, D])
tfY = tf.placeholder(tf.float32, [None, K])
W1 = init_weights([D, M])
b1 = init_weights([M])
W2 = init_weights([M, K])
b2 = init_weights([K])
py_x = forward(tfX, W1, b1, W2, b2)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, T))
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
predict_op = tf.argmax(py_x, 1)
sess = tf.Session()
inti = tf.initizalize_all_variables()
for i in range(1000):
sess.run(train_op, feed_dict={tfX: X, tfY: T})
pred = sess.run(predict_op, feed_dict={tfX: X, tfY: T})
if i % 10 == 0:
print(np.mean(Y == pred))
I have a little issue on the line cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, T)). It is saying that
Traceback (most recent call last):
File "test.py", line 43, in <module>
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, T))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_ops.py", line 1607, in softmax_cross_entropy_with_logits
labels, logits)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_ops.py", line 1562, in _ensure_xent_args
"named arguments (labels=..., logits=..., ...)" % name)
ValueError: Only call `softmax_cross_entropy_with_logits` with named arguments (labels=..., logits=..., ...)
So far I am not an expert with Tensorflow. Could anyone have an idea how I could fix that. It is not an error of logic, but rather of structure I guess.

As per the error message, you need to name the arguments to the softmax... function.
So you should change the line to:
tf.nn.softmax_cross_entropy_with_logits(labels=py_x, logits=T)

Related

How to minimize a value of a function by changing function parameters?

import numpy as np
x = np.array([1,2,3,4,5,6,7])
f1= np.array([1,2,3,4,5,6,7])
f2= np.array([1,2,3,4,5,6,7])
def func(w1,w2,x,f1,f2):
w1=1-w2
return np.std(x/(w1*f1+w2*f2))
i need my code to minimize func(w1,w2,x,f1,f2) by changing w1 and w2 then give me w1 and w2 values. w1 + w2 should be equal to 1.
Something like this might be what you need:
x = np.random.randint(1, 10, 7)
f1 = np.random.randint(1, 10, 7)
f2 = np.random.randint(1, 10, 7)
def func(w, x, f1, f2): # no need to pass w1 and w2 separately
return np.std(x / (w[0] * f1 + (1 - w[0]) * f2))
res = scipy.optimize.minimize(func, x0=[0.5], args=(x, f1, f2), bounds=[(0, 1)])
w1 = res.x[0]
w2 = 1 - w1
print("Optimal weights are", w1, w2)

Why hessian is 0 when I expend vector to calculate?

Loss Function:
L = (XW-t)**2
t: means target value
w: weights, which is [w1, w2]
x: input, which is [x1, x2]
Therefore, L = (-t + w1x1 + w2x2)**2
I try to get Hessian function of it, which should be positive proved by others.
But I get the following result:
(-t + w1*x1 + w2*x2)**2
first grad:
(2*x1*(-t + w1*x1 + w2*x2),)
(2*x2*(-t + w1*x1 + w2*x2),)
Hessian:
[2*x1**2, 2*x1*x2]
[2*x1*x2, 2*x2**2]
Value: 0
Anyone knows why the value is 0?
Here is the code:
import numpy as np
from sympy import *
x1, x2, w1, w2, t = symbols("x1 x2 w1 w2 t")
# inp = Matrix(1, 2, [u, i])
# w = Matrix(2, 1, [w1, w2])
e1 = x1*w1 + x2*w2
# e2 = u*w3 + i*w4
# e3 = e1*w5 + e2*w6
L = (e1-t)**2
print("loss function: ", L)
all_symbols = [w1, w2]
first_diff = []
second_diff = []
for symbol in all_symbols:
first_diff.append(diff(L, symbol))
temp_diff = []
for symbol in all_symbols:
temp_diff.append(expand(diff(first_diff[-1], symbol)))
second_diff.append(temp_diff)
print("\nfirst grad:")
for function in zip(first_diff):
print(function)
print("\nHessian: ")
for elem in second_diff:
print(elem)
Hessian = Matrix(second_diff)
# print(Hessian)
print("\nValue: ", Hessian.det())

How to Implement Mixup in Keras Sequence?

I have multi class target, and I'm trying to implement mixup technique in Keras sequence.
Without it, I get above 70%, but I get below 5% if I include the code below.
Could someone let me know what am I doing wrong?
Thanks!
def __getitem__(self, index):
...
x_batch, y_batch = self.mixup(x_batch, y_batch)
...
return x_batch, y_batch
def mixup(self, x, y):
n = x.shape[0]
l = np.random.beta(self.alpha, self.alpha, n)
x_l = l.reshape(n, 1, 1, 1)
y_l = l.reshape(n, 1)
x1 = x
x2 = np.flip(x, axis=0)
x = x1 * x_l + x2 * (1 - x_l)
y1 = y
y2 = np.flip(y, axis=0)
y = y1 * y_l + y2 * (1 - y_l)
return x, y

python program is super slow, and I can't find out why

I'm writing a multi-layer perceptron from scratch and I think it's way slower than it should be. the culprit seems to be my compute_gradients-function, which according to my investigation answers for most of the execution time. It looks like this:
def compute_gradients(X, Y, S1, H, P, W1, W2, lamb):
# Y must be one-hot
# Y and P must be (10, n)
# X and H must be (3072, n)
# P is the softmax layer
if not (Y.shape[0] == 10 and P.shape[0] == 10 and Y.shape == P.shape):
raise ValueError("Y and P must have shape (10, k). Y: {}, P: {}".format(Y.shape, P.shape))
if not X.shape[0] == n_input:
raise ValueError("X must have shape ({}, k), has {}".format(n_input, X.shape))
if not H.shape[0] == n_hidden:
raise ValueError("H must have shape ({}, k)".format(n_hidden))
grad_W1 = np.zeros([n_hidden, n_input])
grad_W2 = np.zeros([10, n_hidden])
grad_b1 = np.zeros([n_hidden, 1])
grad_b2 = np.zeros([10, 1])
X, Y, H, P = X.T, Y.T, H.T, P.T
for x, y, s1, h, p in zip(X, Y, S1, H, P):
h = np.reshape(h, [1, n_hidden])
y = np.reshape(y, [10, 1])
p = np.reshape(p, [10, 1])
# Second layer
g = -(y-p).T
grad_b2 += g.T
grad_W2 += np.matmul(g.T, h)
# First layer
g = np.matmul(g, W2)
ind = np.zeros(h.shape[1])
for i, val in enumerate(s1):
if val > 0:
ind[i] = 1
diag = np.diag(ind)
g = np.matmul(g, diag)
grad_b1 += g.T
grad_W1 += np.matmul(g.T, np.reshape(x, [1, n_input]))
# Divide by batch size
grad_b1 /= X.shape[0]
grad_b2 /= X.shape[0]
grad_W1 /= X.shape[0]
grad_W2 /= X.shape[0]
# Add regularization term
grad_W1 += 2*lamb*W1
grad_W2 += 2*lamb*W2
return grad_W1, grad_W2, grad_b1, grad_b2
If X, Y, H, P are all 10 rows long (n=10), the computations take about 1 second. This is way too much compared to my friends who are doing the same task. But I can't see any obvious inefficiencies in my code. What can I do to speed the computations up?
EDIT: Input data is the CIFAR dataset. Load it like this:
def one_hot(Y):
# assume Y = [1, 4, 9, 0, ...]
result = [None]*len(Y)
for i, cls in enumerate(Y):
onehot = {
0: lambda: [1,0,0,0,0,0,0,0,0,0],
1: lambda: [0,1,0,0,0,0,0,0,0,0],
2: lambda: [0,0,1,0,0,0,0,0,0,0],
3: lambda: [0,0,0,1,0,0,0,0,0,0],
4: lambda: [0,0,0,0,1,0,0,0,0,0],
5: lambda: [0,0,0,0,0,1,0,0,0,0],
6: lambda: [0,0,0,0,0,0,1,0,0,0],
7: lambda: [0,0,0,0,0,0,0,1,0,0],
8: lambda: [0,0,0,0,0,0,0,0,1,0],
9: lambda: [0,0,0,0,0,0,0,0,0,1],
}[cls]()
result[i] = onehot
result = np.array(result).T
return result
def unpickle(file):
import pickle
with open(file, "rb") as fo:
d = pickle.load(fo, encoding="bytes")
return d
names = ["data_batch_1",
"data_batch_2",
"data_batch_3",
"data_batch_4",
"data_batch_5",
]
# All data sets
dataset_large = {"data": np.zeros([0, 3072]), "labels": np.array([])}
validation_large = {}
## All data batches
for name in names[0:4]:
raw = unpickle(os.path.join(path, name))
dataset_large["data"] = np.append(dataset_large["data"], raw[b"data"], axis = 0)
dataset_large["labels"] = np.append(dataset_large["labels"], raw[b"labels"], axis = 0)
raw = unpickle(os.path.join(path, names[4]))
dataset_large["data"] = np.append(dataset_large["data"], raw[b"data"][0: -1000], axis = 0)
dataset_large["labels"] = np.append(dataset_large["labels"], raw[b"labels"][0: -1000], axis = 0)
validation_large["data"] = raw[b"data"][-1000: ]
validation_large["labels"] = raw[b"labels"][-1000: ]
# Make one-hot
dataset_large["labels"] = one_hot(dataset_large["labels"]).T
validation_large["labels"] = one_hot(validation_large["labels"]).T
# Normalize
dataset_large["data"] = dataset_large["data"]/255
validation_large["data"] = validation_large["data"]/255
the dataset can be found at https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz . Then run like:
def evaluate_classifier(X, W1, W2, b1, b2):
if not X.shape[0] == n_input:
ValueError("Wrong shape of X: {}".format(X.shape))
if not len(X.shape) == 2:
ValueError("Wrong shape of X: {}".format(X.shape))
if not W1.shape == (n_hidden, n_input):
raise ValueError("Wrong shape of W1: {}".format(W1.shape))
if not b1.shape == (n_hidden, 1):
raise ValueError("Wrong shape of b1: {}".format(b1.shape))
if not W2.shape == (10, n_hidden):
raise ValueError("Wrong shape of W2: {}".format(W2.shape))
if not b2.shape == (10, 1):
raise ValueError("Wrong shape of b2: {}".format(b2.shape))
s1, h = layer_1(X, W1, b1)
p = layer_2(h, W2, b2)
return s1, h, p
W1 = np.random.normal(0, 0.01, [n_hidden, n_input])
W2 = np.random.normal(0, 0.01, [10, n_hidden])
b1 = np.random.normal(0, 0.1, [n_hidden, 1])
b2 = np.random.normal(0, 0.1, [10, 1])
X = dataset_large["data"][0:10]
Y = dataset_large["labels"][0:10]
S1, H, P = evaluate_classifier(X, W1, W2, b1, b2)
lamb = 0
compute_gradients(X, Y, S1, H, P, W1, W2, lamb)

Tensorflow self-adjoint eigen decomposition not successful, input might not be valid

I want to define a custom loss function in Keras with Tensorflow backend which uses only the predicted y values, regardless of the true ones. The graph compiles successfully, but at the start of the training it returns an exception: InvalidArgumentError (see above for traceback): Self-adjoint eigen decomposition was not successful. The input might not be valid. I have tried replacing my data with random dummy data, but it produces the same exception.
My full code of the loss definition can be found below. Why is the input to the
tf.self_adjoint_eig not valid?
def model_correlation_loss(representation_size, k_singular_values):
global batch_size
def keras_loss(y_true, y_pred):
global batch_size
regularization_constant_1 = regularization_constant_2 = 1e-4
epsilon = 1e-12
o1 = o2 = int(y_pred.shape[1] // 2)
h_1 = y_pred[:, 0:o1]
h_2 = y_pred[:, o1:o1+o2]
h_1 = tf.transpose(h_1)
h_2 = tf.transpose(h_2)
m = tf.shape(h_1)[1]
centered_h_1 = h_1 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul(h_1, tf.ones(shape=(m, m)))
centered_h_2 = h_2 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul(h_2, tf.ones(shape=(m, m)))
sigma_hat_12 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(centered_h_1, tf.transpose(centered_h_2))
sigma_hat_11 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(centered_h_1, tf.transpose(centered_h_1)) + regularization_constant_1 * tf.eye(num_rows=o1)
sigma_hat_22 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(centered_h_2, tf.transpose(centered_h_2)) + regularization_constant_2 * tf.eye(num_rows=o2)
w_1, v_1 = tf.self_adjoint_eig(sigma_hat_11)
w_2, v_2 = tf.self_adjoint_eig(sigma_hat_22)
zero = tf.constant(False, dtype=tf.bool)
idx_pos_entries_1 = tf.where(tf.equal(tf.greater(w_1, epsilon), True))
idx_pos_entries_1 = tf.reshape(idx_pos_entries_1, [-1, tf.shape(idx_pos_entries_1)[0]])[0]
w_1 = tf.gather(w_1, idx_pos_entries_1)
v_1 = tf.gather(v_1, idx_pos_entries_1)
idx_pos_entries_2 = tf.where(tf.equal(tf.greater(w_2, epsilon), True))
idx_pos_entries_2 = tf.reshape(idx_pos_entries_2, [-1, tf.shape(idx_pos_entries_2)[0]])[0]
w_2 = tf.gather(w_2, idx_pos_entries_2)
v_2 = tf.gather(v_2, idx_pos_entries_2)
sigma_hat_rootinvert_11 = tf.matmul(tf.matmul(v_1, tf.diag(tf.sqrt(w_1))), tf.transpose(v_1))
sigma_hat_rootinvert_22 = tf.matmul(tf.matmul(v_2, tf.diag(tf.sqrt(w_2))), tf.transpose(v_2))
t_matrix = tf.matmul(tf.matmul(sigma_hat_rootinvert_11, sigma_hat_12), sigma_hat_rootinvert_22)
if k_singular_values == representation_size: # use all
correlation = tf.sqrt(tf.trace(tf.matmul(K.transpose(t_matrix), t_matrix)))
return correlation
return keras_loss
Here's the tf code provided by Wang on his website for computing the loss function:
def CCA_loss(H1, H2, N, d1, d2, dim, rcov1, rcov2):
# Remove mean.
m1 = tf.reduce_mean(H1, axis=0, keep_dims=True)
H1 = tf.subtract(H1, m1)
m2 = tf.reduce_mean(H2, axis=0, keep_dims=True)
H2 = tf.subtract(H2, m2)
S11 = tf.matmul(tf.transpose(H1), H1) / (N-1) + rcov1 * tf.eye(d1)
S22 = tf.matmul(tf.transpose(H2), H2) / (N-1) + rcov2 * tf.eye(d2)
S12 = tf.matmul(tf.transpose(H1), H2) / (N-1)
E1, V1 = tf.self_adjoint_eig(S11)
E2, V2 = tf.self_adjoint_eig(S22)
# For numerical stability.
idx1 = tf.where(E1>eps_eig)[:,0]
E1 = tf.gather(E1, idx1)
V1 = tf.gather(V1, idx1, axis=1)
idx2 = tf.where(E2>eps_eig)[:,0]
E2 = tf.gather(E2, idx2)
V2 = tf.gather(V2, idx2, axis=1)
K11 = tf.matmul( tf.matmul(V1, tf.diag(tf.reciprocal(tf.sqrt(E1)))), tf.transpose(V1))
K22 = tf.matmul( tf.matmul(V2, tf.diag(tf.reciprocal(tf.sqrt(E2)))), tf.transpose(V2))
T = tf.matmul( tf.matmul(K11, S12), K22)
# Eigenvalues are sorted in increasing order.
E2, U = tf.self_adjoint_eig(tf.matmul(T, tf.transpose(T)))
return tf.reduce_sum(tf.sqrt(E2[-dim:]))

Categories