python program is super slow, and I can't find out why - python

I'm writing a multi-layer perceptron from scratch and I think it's way slower than it should be. the culprit seems to be my compute_gradients-function, which according to my investigation answers for most of the execution time. It looks like this:
def compute_gradients(X, Y, S1, H, P, W1, W2, lamb):
# Y must be one-hot
# Y and P must be (10, n)
# X and H must be (3072, n)
# P is the softmax layer
if not (Y.shape[0] == 10 and P.shape[0] == 10 and Y.shape == P.shape):
raise ValueError("Y and P must have shape (10, k). Y: {}, P: {}".format(Y.shape, P.shape))
if not X.shape[0] == n_input:
raise ValueError("X must have shape ({}, k), has {}".format(n_input, X.shape))
if not H.shape[0] == n_hidden:
raise ValueError("H must have shape ({}, k)".format(n_hidden))
grad_W1 = np.zeros([n_hidden, n_input])
grad_W2 = np.zeros([10, n_hidden])
grad_b1 = np.zeros([n_hidden, 1])
grad_b2 = np.zeros([10, 1])
X, Y, H, P = X.T, Y.T, H.T, P.T
for x, y, s1, h, p in zip(X, Y, S1, H, P):
h = np.reshape(h, [1, n_hidden])
y = np.reshape(y, [10, 1])
p = np.reshape(p, [10, 1])
# Second layer
g = -(y-p).T
grad_b2 += g.T
grad_W2 += np.matmul(g.T, h)
# First layer
g = np.matmul(g, W2)
ind = np.zeros(h.shape[1])
for i, val in enumerate(s1):
if val > 0:
ind[i] = 1
diag = np.diag(ind)
g = np.matmul(g, diag)
grad_b1 += g.T
grad_W1 += np.matmul(g.T, np.reshape(x, [1, n_input]))
# Divide by batch size
grad_b1 /= X.shape[0]
grad_b2 /= X.shape[0]
grad_W1 /= X.shape[0]
grad_W2 /= X.shape[0]
# Add regularization term
grad_W1 += 2*lamb*W1
grad_W2 += 2*lamb*W2
return grad_W1, grad_W2, grad_b1, grad_b2
If X, Y, H, P are all 10 rows long (n=10), the computations take about 1 second. This is way too much compared to my friends who are doing the same task. But I can't see any obvious inefficiencies in my code. What can I do to speed the computations up?
EDIT: Input data is the CIFAR dataset. Load it like this:
def one_hot(Y):
# assume Y = [1, 4, 9, 0, ...]
result = [None]*len(Y)
for i, cls in enumerate(Y):
onehot = {
0: lambda: [1,0,0,0,0,0,0,0,0,0],
1: lambda: [0,1,0,0,0,0,0,0,0,0],
2: lambda: [0,0,1,0,0,0,0,0,0,0],
3: lambda: [0,0,0,1,0,0,0,0,0,0],
4: lambda: [0,0,0,0,1,0,0,0,0,0],
5: lambda: [0,0,0,0,0,1,0,0,0,0],
6: lambda: [0,0,0,0,0,0,1,0,0,0],
7: lambda: [0,0,0,0,0,0,0,1,0,0],
8: lambda: [0,0,0,0,0,0,0,0,1,0],
9: lambda: [0,0,0,0,0,0,0,0,0,1],
result[i] = onehot
result = np.array(result).T
return result
def unpickle(file):
import pickle
with open(file, "rb") as fo:
d = pickle.load(fo, encoding="bytes")
return d
names = ["data_batch_1",
# All data sets
dataset_large = {"data": np.zeros([0, 3072]), "labels": np.array([])}
validation_large = {}
## All data batches
for name in names[0:4]:
raw = unpickle(os.path.join(path, name))
dataset_large["data"] = np.append(dataset_large["data"], raw[b"data"], axis = 0)
dataset_large["labels"] = np.append(dataset_large["labels"], raw[b"labels"], axis = 0)
raw = unpickle(os.path.join(path, names[4]))
dataset_large["data"] = np.append(dataset_large["data"], raw[b"data"][0: -1000], axis = 0)
dataset_large["labels"] = np.append(dataset_large["labels"], raw[b"labels"][0: -1000], axis = 0)
validation_large["data"] = raw[b"data"][-1000: ]
validation_large["labels"] = raw[b"labels"][-1000: ]
# Make one-hot
dataset_large["labels"] = one_hot(dataset_large["labels"]).T
validation_large["labels"] = one_hot(validation_large["labels"]).T
# Normalize
dataset_large["data"] = dataset_large["data"]/255
validation_large["data"] = validation_large["data"]/255
the dataset can be found at . Then run like:
def evaluate_classifier(X, W1, W2, b1, b2):
if not X.shape[0] == n_input:
ValueError("Wrong shape of X: {}".format(X.shape))
if not len(X.shape) == 2:
ValueError("Wrong shape of X: {}".format(X.shape))
if not W1.shape == (n_hidden, n_input):
raise ValueError("Wrong shape of W1: {}".format(W1.shape))
if not b1.shape == (n_hidden, 1):
raise ValueError("Wrong shape of b1: {}".format(b1.shape))
if not W2.shape == (10, n_hidden):
raise ValueError("Wrong shape of W2: {}".format(W2.shape))
if not b2.shape == (10, 1):
raise ValueError("Wrong shape of b2: {}".format(b2.shape))
s1, h = layer_1(X, W1, b1)
p = layer_2(h, W2, b2)
return s1, h, p
W1 = np.random.normal(0, 0.01, [n_hidden, n_input])
W2 = np.random.normal(0, 0.01, [10, n_hidden])
b1 = np.random.normal(0, 0.1, [n_hidden, 1])
b2 = np.random.normal(0, 0.1, [10, 1])
X = dataset_large["data"][0:10]
Y = dataset_large["labels"][0:10]
S1, H, P = evaluate_classifier(X, W1, W2, b1, b2)
lamb = 0
compute_gradients(X, Y, S1, H, P, W1, W2, lamb)


NotImplementedError: Cannot convert a symbolic tf.Tensor (Log_2:0) to a numpy array

I have the following code which is based on The original code was based on TF v1 and I am in the process of migrating it to TF v2. However, I am facing some issues while trying to perform a numpy operation on a tensor. I am running the code on Google Colab.
The reproducible code is as below, sorry it is quite long:
import tensorflow as tf
import numpy as np
import scipy as sp
from datetime import datetime
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
HERMITE = [[1, 0, -3, 2], [0, 0, 3, -2], [0, 1, -2, 1], [0, 0, -1, 1]]
FORMAT = 'float32'
def real_hermite_interp(xi, x, m, p):
# Hermite polynomial coefficients
h = tf.Variable(np.array(HERMITE).astype(FORMAT), trainable=False)
xx = tf.stack([x[:, :-1], x[:, 1:]], axis=2)
# The concatenated coefficients are of shape (n_knots - 1, 2)
mm = tf.stack([m[:-1], m[1:]], axis=1)
pp = tf.stack([p[:-1], p[1:]], axis=1)
y = tf.concat([mm, pp], axis=1)
# Extract Hermite polynomial coefficients from y (n_knots - 1, 4)
yh = tf.matmul(y, h)
xi_ = tf.expand_dims(tf.expand_dims(xi, 0), 0)
x0_ = tf.expand_dims(xx[:, :, 0], 2)
x1_ = tf.expand_dims(xx[:, :, 1], 2)
xn = (xi_ - x0_) / (x1_ - x0_)
# Calculate powers of normalized interpolation vector
mask = tf.logical_and(tf.greater_equal(xn, 0.), tf.less(xn, 1.))
mask = tf.cast(mask, tf.float32)
xp = tf.pow(tf.expand_dims(xn, -1), [0, 1, 2, 3])
# Interpolate
return tf.einsum('rf,srtf->st', yh, xp * tf.expand_dims(mask, -1))
class Scattering:
"""Learnable scattering network layer."""
def __init__(self, x, j=None, q=None, k=None, pooling_type='average',
decimation=2, pooling=2, index=0, **filters_kw):
"""Scattering network layer.
Computes the convolution modulus and scattering coefficients of the
input signal.
x: :class:`~tensorflow.Tensor()`
Input data of shape ``(batch_size, channels, patch_shape).
# Filter bank properties
self.shape_input = x.get_shape().as_list()
self.j = j = j[index] if type(j) is list else j
self.q = q = q[index] if type(q) is list else q
self.k = k = k[index] if type(k) is list else k
filters = self.init_filters(j, q, k, **filters_kw)
n_filters, kernel_size = filters.get_shape().as_list()
filters_concat = tf.concat([tf.math.real(filters), tf.math.imag(filters)], 0)
filters_kernel = tf.expand_dims(tf.transpose(filters_concat), 1)
# Pad input in the time dimension before convolution with half the size
# of filters temporal dimension (kernel_size).
shape_fast = [[:-1]), 1, self.shape_input[-1]]
paddings = [0, 0], [0, 0], [kernel_size // 2 - 1, kernel_size // 2 + 1]
x_reshape = tf.reshape(x, shape_fast)
x_pad = tf.pad(x_reshape, paddings=paddings, mode='SYMMETRIC')
# Differentiate the case of one input channel or multiple
# which needs reshaping in order to treat them independently
# The "NCW" format stores data as batch_shape + [in_channels, in_width]
x_conv = tf.nn.conv1d(x_pad, filters_kernel, stride=decimation,
padding='VALID', data_format='NCW')
u = tf.sqrt(tf.square(x_conv[:, :n_filters]) +
tf.square(x_conv[:, n_filters:]))
self.u = tf.reshape(u, (*self.shape_input[:-1], n_filters, -1))
pool = tf.keras.layers.AveragePooling1D
# Pooling for the scattering coefficients
if pooling > 1:
pooled = pool(
pooling // (decimation ** (index + 1)),
pooling // (decimation ** (index + 1)),
padding='valid', data_format='channels_first')
pooled = pooled(u)
self.s = tf.reshape(pooled, self.shape_input[:-1] + [j * q] + [-1])
self.output = self.s
inverse = tf.gradients(x_conv, x, x_conv)[0]
self.reconstruction_loss = tf.nn.l2_loss(
inverse - tf.stop_gradient(x)) /
def init_filters(self, j, q, k, learn_scales=False, learn_knots=False,
learn_filters=True, hilbert=False):
extra_octave = 1 if learn_scales else 0
self.filter_samples = k * 2 ** (j + extra_octave)
time_max = np.float32(k * 2**(j - 1 + extra_octave))
time_grid = tf.linspace(-time_max, time_max, self.filter_samples)
scales_base = 2**(tf.range(j * q, dtype=tf.float32) / np.float32(q))
scales_delta = tf.Variable(
tf.zeros(j * q), trainable=learn_scales, name='scales')
scales = scales_base + scales_delta
nyquist_offset = scales + \
tf.stop_gradient(tf.one_hot(0, j * q) * tf.nn.relu(1 - scales[0]))
scales_correction = tf.concat(
tf.nn.relu(nyquist_offset[:-1] - nyquist_offset[1:])], 0)
self.scales = nyquist_offset + \
knots_base = tf.Variable(
tf.ones(k), trainable=learn_knots, name='knots')
knots_sum = tf.cumsum(
tf.expand_dims(knots_base, 0) * tf.expand_dims(self.scales, 1),
1, self.filter_samples - k), exclusive=True, axis=1)
self.knots = knots_sum - (k // 2) * tf.expand_dims(self.scales, 1)
if hilbert is True:
m = (np.cos(np.arange(k) * np.pi) * np.hamming(k)).astype(FORMAT)
p = (np.zeros(k)).astype(FORMAT)
self.m = tf.Variable(m, name='m', trainable=learn_filters)
self.p = tf.Variable(p, name='p', trainable=learn_filters)
# Boundary Conditions and centering
mask = np.ones(k, dtype=np.float32)
mask[0], mask[-1] = 0, 0
m_null = self.m - tf.reduce_mean(self.m[1:-1])
filters = real_hermite_interp(
time_grid, self.knots, m_null * mask, self.p * mask)
# Renorm and set filter-bank
filters_renorm = filters / tf.reduce_max(filters, 1, keepdims=True)
filters_fft = tf.signal.rfft(filters_renorm) # was spectral.rfft
filters = tf.signal.ifft(
tf.concat([filters_fft, tf.zeros_like(filters_fft)], 1))
# Define the parameters for saving
self.parameters = self.m, self.p, self.scales, self.knots
return filters
def renorm(self, parent, epsilon=1e-3):
# Extract all shapes.
if epsilon > 0:
s = self.s / (tf.expand_dims(parent.s, -2) + epsilon)
batch_size, *_, samples = s.get_shape().as_list()
return tf.reshape(s, [batch_size, -1, samples])
return tf.reshape(self.s, [batch_size, -1, samples])
# testing
data = tf.random.uniform((4,3,16800), dtype=tf.float32)
batch_size = 4
args = {'layers': {'j': [4, 6, 8], 'q': [8, 2, 1], 'k': 7, 'pooling_type': 'average', 'decimation': 4, 'pooling': 1024, 'learn_scales': False, 'learn_knots': False, 'learn_filters': True, 'hilbert': True}, 'eps_norm': 0.001, 'eps_log': 0.0001, 'learning': {'epochs': 3, 'rate': 0.001}, 'pca': {'n_components': 5}, 'gmm': {'gmm_type': 'natural', 'trainable': False}, 'gmm_init': {'n_components': 10, 'max_iter': 1000, 'covariance_type': 'full', 'warm_start': True}}
# Run over batches
epochs = args['learning']['epochs']
learning_rate = args['learning']['rate']
for epoch in range(epochs):
# Gradually decrease learning rate over epochs
if epoch == epochs // 2:
learning_rate /= 5
if epoch == 3 * epochs // 4:
learning_rate /= 5
# Calculate scattering coefficients for all batches
scat_all = list()
n_batches = data.shape[0] // batch_size
for b in range(n_batches):
layers = [Scattering(data, index=0, **args['layers'])]
for i in range(1, 3):
layer = Scattering(layers[-1].u, index=i, **args['layers'])
# Extract parameters.
net = [layer.parameters for layer in layers]
# Get reconstruction losses.
rl = tf.add_n([a.reconstruction_loss for a in layers])
# Renormalize coefficients.
r = list()
for i in range(1, 3):
r.append(layers[i].renorm(layers[i - 1], args['eps_norm']))
# Concatenate.
sx = tf.transpose(tf.concat(r, axis=1), [1, 0, 2])
sx = tf.reshape(sx, [sx.get_shape().as_list()[0], -1])
sx = tf.transpose(sx)
sx = tf.math.log(sx + args['eps_log'])
sx[np.isnan(sx)] = np.log(args['eps_log'])
sx[np.isinf(sx)] = np.log(args['eps_log'])
The issue is from the line 'sx[np.isnan(sx)] = np.log(args['eps_log'])'. The full error is shown below:
NotImplementedError Traceback (most recent call last)
Cell In [6], line 34
31 print("sx:", sx)
32 print("sx shape: ", sx.shape)
---> 34 sx[np.isnan(sx)] = np.log(args['eps_log'])
35 sx[np.isinf(sx)] = np.log(args['eps_log'])
36 scat_all.append(sx)
File c:\Python310\lib\site-packages\tensorflow\python\framework\, in Tensor.__array__(***failed resolving arguments***)
920 def __array__(self, dtype=None):
921 del dtype
--> 922 raise NotImplementedError(
923 f"Cannot convert a symbolic tf.Tensor ({}) to a numpy array."
924 f" This error may indicate that you're trying to pass a Tensor to"
925 f" a NumPy call, which is not supported.")
NotImplementedError: Cannot convert a symbolic tf.Tensor (Log_2:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported.
Based on solutions on previous stackoverflow posts, I have tried to upgrade my tensorflow (2.11.0) and numpy versions (1.23.5) but that did not solve the problem. I saw some suggestions on downgrading numpy but because of other dependencies that did not work. My Python version is 3.8.16. Any suggestions on how to proceed? Thanks in advance.

Python-coded neural network does not learn properly

My network is not trained to recognize inputs separately, it either outputs the averaged result or becomes biased to one particular output. What am I doing wrong?
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape) # The amount layers
self.shape = shape # The amount of neurons per each layer
self.weights = [
np.array([np.random.rand(shape[l - 1]) for _ in range(shape[l])])
for l in range(1, self.layers)
] # A list of matrices of weights connecting neighbouring layers
self.weighted_sums = [np.zeros(l) for l in shape]
self.activations = [np.zeros(l) for l in shape]
def inspect(self):
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] # self.activations[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y):
delta = [np.empty(self.shape[l]) for l in range(1, self.layers)] # Here errors get stored
delta[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) # The output error
for l in reversed(range(self.layers - 2)): # The errors get backpropagated
delta[l] = self.weights[l + 1].T # delta[l + 1] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1): # The weights get updated online
for j in range(self.shape[l + 1]):
self.weights[l][j] -= 0.1 * self.activations[l + 1][j] * delta[l][j]
nn = NeuralNetwork((2, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
Y = np.array([
# I train my network by randomly picking an example from my training sets
for _ in range(1000):
i = np.random.randint(0, 4)
nn.backprop(X[i], Y[i])
for x in X:
The matrix math of backpropagation is quite tough. It is especially confusing that the length of the lists of weight matrices and deltas (actually the list of bias arrays too) should be one less than the amount of layers in a network which makes indexing confusing. Apparently, the problem was due to misindexing. Finally it works!
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape)
self.shape = shape
self.weights = [
np.array([2 * np.random.random(shape[l - 1]) - 1 for _ in range(shape[l])])
for l in range(1, self.layers)
self.biases = [np.zeros(l) for l in shape[1:]]
self.weighted_sums = [None for l in shape]
self.activations = [None for l in shape]
self.deltas = [None for l in shape[1:]]
def inspect(self):
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] # self.activations[l - 1] + self.biases[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y, lr):
self.deltas[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1])
for l in range(self.layers - 2, 0, -1):
self.deltas[l - 1] = self.weights[l].T # self.deltas[l] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1):
for j in range(self.shape[l + 1]):
self.weights[l][j] += lr * self.activations[l] * self.deltas[l][j]
self.biases[l] += self.deltas[l]
def train(self, X, Y, lr, epochs):
for e in range(epochs):
if not e % 1000: self.test(X)
i = np.random.randint(len(X))
self.backprop(X[i], Y[i], lr)
def test(self, X):
for x in X:
print(x, self.activations[-1])
if __name__ == "__main__":
nn = NeuralNetwork((2, 3, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
Y = np.array([
nn.train(X, Y, 0.4, 20000)

Calculate covariance of torch tensor (2d feature map)

I have a torch tensor with shape (batch_size, number_maps, x_val, y_val). The tensor is normalized with a sigmoid function, so within range [0, 1]. I want to find the covariance for each map, so I want to have a tensor with shape (batch_size, number_maps, 2, 2). As far as I know, there is no torch.cov() function as in numpy. How can I efficiently calculate the covariance without converting it to numpy?
def get_covariance(tensor):
bn, nk, w, h = tensor.shape
tensor_reshape = tensor.reshape(bn, nk, 2, -1)
x = tensor_reshape[:, :, 0, :]
y = tensor_reshape[:, :, 1, :]
mean_x = torch.mean(x, dim=2).unsqueeze(-1)
mean_y = torch.mean(y, dim=2).unsqueeze(-1)
xx = torch.sum((x - mean_x) * (x - mean_x), dim=2).unsqueeze(-1) / (h*w - 1)
xy = torch.sum((x - mean_x) * (y - mean_y), dim=2).unsqueeze(-1) / (h*w - 1)
yx = xy
yy = torch.sum((y - mean_y) * (y - mean_y), dim=2).unsqueeze(-1) / (h*w - 1)
cov =, xy, yx, yy), dim=2)
cov = cov.reshape(bn, nk, 2, 2)
return cov
I tried the following now, but I m pretty sure it's not correct.
You could try the function suggested on Github:
def cov(x, rowvar=False, bias=False, ddof=None, aweights=None):
"""Estimates covariance matrix like numpy.cov"""
# ensure at least 2D
if x.dim() == 1:
x = x.view(-1, 1)
# treat each column as a data point, each row as a variable
if rowvar and x.shape[0] != 1:
x = x.t()
if ddof is None:
if bias == 0:
ddof = 1
ddof = 0
w = aweights
if w is not None:
if not torch.is_tensor(w):
w = torch.tensor(w, dtype=torch.float)
w_sum = torch.sum(w)
avg = torch.sum(x * (w/w_sum)[:,None], 0)
avg = torch.mean(x, 0)
# Determine the normalization
if w is None:
fact = x.shape[0] - ddof
elif ddof == 0:
fact = w_sum
elif aweights is None:
fact = w_sum - ddof
fact = w_sum - ddof * torch.sum(w * w) / w_sum
xm = x.sub(avg.expand_as(x))
if w is None:
X_T = xm.t()
X_T =, xm).t()
c =, xm)
c = c / fact
return c.squeeze()

CVXPY error: "NotImplementedError: Strict inequalities are not allowed"

def PPNM_model(a,E, beta):
p = E.shape[1]
x = E*a
x = sum(x,beta*cp.square(x))
return x
def PPNM_model_cvxpy(a,E,beta):
first = E*a
second = beta*cp.square(first)
third = sum(first,second)
return third
def construct_ppnm_model(x_in,A,E, x_LMM, a_lmm):
p = E.shape[1]
d = E.shape[0]
N = A.shape[0]
x = np.zeros(shape=(N,d), dtype=np.float64)
a_ppnm = np.zeros(shape=(N,p), dtype=np.float64)
beta_ppnm = np.zeros(shape=(N,1), dtype=np.float64)
current_lmm = cp.Variable(p)
current_beta = cp.Variable()
b_min = 0
b_max = 100
all_zeros = np.squeeze(np.zeros(shape=(N,1), dtype=np.double))
for i in np.arange(0, N):
x_in_temp = x_in[i,:].astype(np.double)
objective =
E, current_beta)) - x_in_temp))
constraints = [current_lmm >= 0,
current_lmm <= 1,
current_beta >= b_min,
current_beta <= b_max,
sum_to_one_vector*current_lmm == 1]
prob = cp.Problem(objective, constraints)
result = prob.solve()
beta_ppnm[i] = current_beta.value
current_vector = PPNM_model(a_ppnm[i,:], E, current_beta.value)
return x, a_ppnm, beta_ppnm
In this problem, matrix A is of shape (10000, 4) which is (total pixels, endmembers), E is of shape (198, 4) :(spectral bands, endmembers)
and x is of shape (10000, 198) :( pixels, spectral bands)
When I call the construct_ppnm_model like this:
x_ppnm, a_ppnm, beta_ppnm = construct_ppnm_model(hsi_2d, A, E, x_LMM, a_lmm)
I get the following error message:
NotImplementedError: Strict inequalities are not allowed.

Issue with Tensorflow method

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
Nclass = 500
D = 2
M = 3
K = 3
X1 = np.random.randn(Nclass, D) + np.array([0, -2])
X2 = np.random.randn(Nclass, D) + np.array([2, 2])
X3 = np.random.randn(Nclass, D) + np.array([-2, 2])
X = np.vstack ([X1, X2, X3]).astype(np.float32)
Y = np.array([0]*Nclass + [1]*Nclass + [2]*Nclass)
plt.scatter(X[:,0], X[:,1], c=Y, s=100, alpha=0.5)
N = len(Y)
T = np.zeros((N, K))
for i in range(N):
T[i, Y[i]] = 1
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def forward(X, W1, b1, W2, b2):
Z = tf.nn.sigmoid(tf.matmul(X, W1) + b1)
return tf.matmul(Z, W2) + b2
tfX = tf.placeholder(tf.float32, [None, D])
tfY = tf.placeholder(tf.float32, [None, K])
W1 = init_weights([D, M])
b1 = init_weights([M])
W2 = init_weights([M, K])
b2 = init_weights([K])
py_x = forward(tfX, W1, b1, W2, b2)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, T))
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
predict_op = tf.argmax(py_x, 1)
sess = tf.Session()
inti = tf.initizalize_all_variables()
for i in range(1000):, feed_dict={tfX: X, tfY: T})
pred =, feed_dict={tfX: X, tfY: T})
if i % 10 == 0:
print(np.mean(Y == pred))
I have a little issue on the line cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, T)). It is saying that
Traceback (most recent call last):
File "", line 43, in <module>
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, T))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/", line 1607, in softmax_cross_entropy_with_logits
labels, logits)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/", line 1562, in _ensure_xent_args
"named arguments (labels=..., logits=..., ...)" % name)
ValueError: Only call `softmax_cross_entropy_with_logits` with named arguments (labels=..., logits=..., ...)
So far I am not an expert with Tensorflow. Could anyone have an idea how I could fix that. It is not an error of logic, but rather of structure I guess.
As per the error message, you need to name the arguments to the softmax... function.
So you should change the line to:
tf.nn.softmax_cross_entropy_with_logits(labels=py_x, logits=T)
