Python-coded neural network does not learn properly - python

My network is not trained to recognize inputs separately, it either outputs the averaged result or becomes biased to one particular output. What am I doing wrong?
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape) # The amount layers
self.shape = shape # The amount of neurons per each layer
self.weights = [
np.array([np.random.rand(shape[l - 1]) for _ in range(shape[l])])
for l in range(1, self.layers)
] # A list of matrices of weights connecting neighbouring layers
self.weighted_sums = [np.zeros(l) for l in shape]
self.activations = [np.zeros(l) for l in shape]
def inspect(self):
print("=============NeuralNetwork===============")
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] # self.activations[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y):
delta = [np.empty(self.shape[l]) for l in range(1, self.layers)] # Here errors get stored
delta[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) # The output error
for l in reversed(range(self.layers - 2)): # The errors get backpropagated
delta[l] = self.weights[l + 1].T # delta[l + 1] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1): # The weights get updated online
for j in range(self.shape[l + 1]):
self.weights[l][j] -= 0.1 * self.activations[l + 1][j] * delta[l][j]
nn = NeuralNetwork((2, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
])
Y = np.array([
[1],
[1],
[0],
[0]
])
# I train my network by randomly picking an example from my training sets
for _ in range(1000):
i = np.random.randint(0, 4)
nn.forward_prop(X[i])
nn.backprop(X[i], Y[i])
for x in X:
nn.forward_prop(x)
print(nn.activations[-1])

The matrix math of backpropagation is quite tough. It is especially confusing that the length of the lists of weight matrices and deltas (actually the list of bias arrays too) should be one less than the amount of layers in a network which makes indexing confusing. Apparently, the problem was due to misindexing. Finally it works!
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape)
self.shape = shape
self.weights = [
np.array([2 * np.random.random(shape[l - 1]) - 1 for _ in range(shape[l])])
for l in range(1, self.layers)
]
self.biases = [np.zeros(l) for l in shape[1:]]
self.weighted_sums = [None for l in shape]
self.activations = [None for l in shape]
self.deltas = [None for l in shape[1:]]
def inspect(self):
print("=============NeuralNetwork===============")
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] # self.activations[l - 1] + self.biases[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y, lr):
self.deltas[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1])
for l in range(self.layers - 2, 0, -1):
self.deltas[l - 1] = self.weights[l].T # self.deltas[l] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1):
for j in range(self.shape[l + 1]):
self.weights[l][j] += lr * self.activations[l] * self.deltas[l][j]
self.biases[l] += self.deltas[l]
def train(self, X, Y, lr, epochs):
for e in range(epochs):
if not e % 1000: self.test(X)
i = np.random.randint(len(X))
self.forward_prop(X[i])
self.backprop(X[i], Y[i], lr)
def test(self, X):
print()
for x in X:
self.forward_prop(x)
print(x, self.activations[-1])
if __name__ == "__main__":
nn = NeuralNetwork((2, 3, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
])
Y = np.array([
[1],
[1],
[0],
[0]
])
nn.train(X, Y, 0.4, 20000)
nn.test(X)

Related

Python implementation of Francis double step QR iteration algorithm does not converge

I am implementing the Francis double step QR Iteration algorithm using the notes and psuedocode from lecture https://people.inf.ethz.ch/arbenz/ewp/Lnotes/chapter4.pdf - Algorithm 4.5
The psuedocode is provided in Matlab I believe.
Below is the implementation of my code.
# compute upper hessenberg form of matrix
def hessenberg(A):
m,n = A.shape
H = A.astype(np.float64)
for k in range(n-2):
x = H[k+1:, k]
v = np.concatenate([np.array([np.sign(x[0]) * np.linalg.norm(x)]), x[1:]])
v = v / np.linalg.norm(v)
H[k+1:, k:] -= 2 * np.outer(v, np.dot(v, H[k+1:, k:]))
H[:, k+1:] -= 2 * np.outer(np.dot(H[:, k+1:], v), v)
return(H)
# compute first three elements of M
def first_three_M(T,s,t):
x = T[0, 0]**2 + T[0, 1] * T[1, 0] - s * T[0, 0] + t
y = T[1, 0] * (T[0, 0] + T[1, 1] - s)
z = T[1, 0] * T[2, 1]
return(x,y,z)
# householder reflection
def householder_reflection_step(x_1):
v = x_1[0] + np.sign(x_1[0]) * np.linalg.norm(x_1)
v = v / np.linalg.norm(v)
P = np.eye(3) - 2 * np.outer(v, v)
return(P)
# update elements of M
def update_M(T,k,p):
x = T[k+1, k]
y = T[k+2, k]
if k < p - 3:
z = T[k+3, k]
else:
z = 0
return(x,y,z)
# givens rotation
def givens_step(T,x_2,x,y,p,q,n):
# calculate c and s
c = x / np.sqrt(x**2 + y**2)
s = -y / np.sqrt(x**2 + y**2)
P = np.array([[c, s], [-s, c]])
T[q-1:p, p-3:n] = P.T # T[q-1:p, p-3:n]
T[0:p, p-2:p] = T[0:p, p-2:p] # P
return(T)
# deflation step
def deflation_step(T,p,q,epsilon):
if abs(T[p-1, p-2]) < epsilon * (abs(T[p-2, p-2]) + abs(T[p-1, p-1])):
T[p-1, p-2] = 0
p = p - 1
q = p - 1
elif abs(T[p-2, p-3]) < epsilon * (abs(T[p-3, p-3]) + abs(T[p-2, p-2])):
T[p-2, p-3] = 0
p = p - 2
q = p - 1
return(T,p,q)
# francis qr step
def francis_step(H, epsilon=0.90):
n = H.shape[0]
T = H.copy().astype(np.float64)
p = n - 1
while p > 2:
q = p - 1
s = T[q, q] + T[p, p]
t = T[q, q] * T[p, p] - T[q, p] * T[p, q]
# Compute M
x,y,z = first_three_M(T,s,t)
x_1 = np.transpose([[x], [y], [z]])
# Bulge chasing
for k in range(p - 3):
# Compute Householder reflector
P = householder_reflection_step(x_1)
r = max(1, k-1)
T[k:k+3, r:] = P.T # T[k:k+3, r:]
r = min(k + 3, p)
T[0:r, k:k+3] = T[0:r, k:k+3] # P
# Update M
x,y,z = update_M(T,k,p)
x_2 = np.transpose([[x], [y]])
# Compute Givens rotation
T = givens_step(T,x_2,x,y,p,q,n)
# Check for convergence
T,p,q = deflation_step(T,p,q,epsilon)
return(T)
# francis qr iteration
def francis_qr_iteration(A):
m,n = A.shape
H = hessenberg(A)
eigvals = []
iters = 0
max_iters = 100
while iters<max_iters:
# Perform Francis step
T = francis_step(H)
eigvals.append(np.diag(T))
iters+=1
return(eigvals)
# for quick testing
A = np.array([[2, 2, 3, 4, 2],
[1, 2, 4, 2, 3],
[4, 1, 2, 1, 5],
[5, 2, 5, 2, 1],
[3, 6, 3, 1, 4]])
eigenvals = francis_qr_iteration(A)
#comparing our method to scipy - final eigvals obtained
print(len(eigenvals))
print(sorted(eigenvals[-1]))
print(sorted(scipy.linalg.eig(A)[0].real))
And this is the output I am getting.
100
[-4.421235127393854, -0.909209110641351, -0.8342390091346807, 3.7552499102751575, 8.215454029003958]
[-3.0411228516834217, -1.143605409373778, -1.143605409373778, 3.325396565009845, 14.002937105421134]
The matrix T is not changing and hence it does not converge to the Schur form through which I can obtain the eigenvalues by using np.diag(T). I believe the error is coming either from the Givens rotation step or the Householder reflection step. It could be an indexing issue since I tried to work in python using matlab psuedocode. Please let me know where I am going wrong so I can improve the code and make it converge.

NotImplementedError: Cannot convert a symbolic tf.Tensor (Log_2:0) to a numpy array

I have the following code which is based on https://github.com/leonard-seydoux/scatnet. The original code was based on TF v1 and I am in the process of migrating it to TF v2. However, I am facing some issues while trying to perform a numpy operation on a tensor. I am running the code on Google Colab.
The reproducible code is as below, sorry it is quite long:
import tensorflow as tf
import numpy as np
import scipy as sp
from datetime import datetime
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
HERMITE = [[1, 0, -3, 2], [0, 0, 3, -2], [0, 1, -2, 1], [0, 0, -1, 1]]
FORMAT = 'float32'
def real_hermite_interp(xi, x, m, p):
# Hermite polynomial coefficients
h = tf.Variable(np.array(HERMITE).astype(FORMAT), trainable=False)
xx = tf.stack([x[:, :-1], x[:, 1:]], axis=2)
# The concatenated coefficients are of shape (n_knots - 1, 2)
mm = tf.stack([m[:-1], m[1:]], axis=1)
pp = tf.stack([p[:-1], p[1:]], axis=1)
y = tf.concat([mm, pp], axis=1)
# Extract Hermite polynomial coefficients from y (n_knots - 1, 4)
yh = tf.matmul(y, h)
xi_ = tf.expand_dims(tf.expand_dims(xi, 0), 0)
x0_ = tf.expand_dims(xx[:, :, 0], 2)
x1_ = tf.expand_dims(xx[:, :, 1], 2)
xn = (xi_ - x0_) / (x1_ - x0_)
# Calculate powers of normalized interpolation vector
mask = tf.logical_and(tf.greater_equal(xn, 0.), tf.less(xn, 1.))
mask = tf.cast(mask, tf.float32)
xp = tf.pow(tf.expand_dims(xn, -1), [0, 1, 2, 3])
# Interpolate
return tf.einsum('rf,srtf->st', yh, xp * tf.expand_dims(mask, -1))
class Scattering:
"""Learnable scattering network layer."""
def __init__(self, x, j=None, q=None, k=None, pooling_type='average',
decimation=2, pooling=2, index=0, **filters_kw):
"""Scattering network layer.
Computes the convolution modulus and scattering coefficients of the
input signal.
Arguments
---------
x: :class:`~tensorflow.Tensor()`
Input data of shape ``(batch_size, channels, patch_shape).
"""
# Filter bank properties
self.shape_input = x.get_shape().as_list()
self.j = j = j[index] if type(j) is list else j
self.q = q = q[index] if type(q) is list else q
self.k = k = k[index] if type(k) is list else k
filters = self.init_filters(j, q, k, **filters_kw)
n_filters, kernel_size = filters.get_shape().as_list()
filters_concat = tf.concat([tf.math.real(filters), tf.math.imag(filters)], 0)
filters_kernel = tf.expand_dims(tf.transpose(filters_concat), 1)
# Pad input in the time dimension before convolution with half the size
# of filters temporal dimension (kernel_size).
shape_fast = [np.prod(self.shape_input[:-1]), 1, self.shape_input[-1]]
paddings = [0, 0], [0, 0], [kernel_size // 2 - 1, kernel_size // 2 + 1]
x_reshape = tf.reshape(x, shape_fast)
x_pad = tf.pad(x_reshape, paddings=paddings, mode='SYMMETRIC')
# Differentiate the case of one input channel or multiple
# which needs reshaping in order to treat them independently
# The "NCW" format stores data as batch_shape + [in_channels, in_width]
x_conv = tf.nn.conv1d(x_pad, filters_kernel, stride=decimation,
padding='VALID', data_format='NCW')
u = tf.sqrt(tf.square(x_conv[:, :n_filters]) +
tf.square(x_conv[:, n_filters:]))
self.u = tf.reshape(u, (*self.shape_input[:-1], n_filters, -1))
pool = tf.keras.layers.AveragePooling1D
# Pooling for the scattering coefficients
if pooling > 1:
pooled = pool(
pooling // (decimation ** (index + 1)),
pooling // (decimation ** (index + 1)),
padding='valid', data_format='channels_first')
pooled = pooled(u)
self.s = tf.reshape(pooled, self.shape_input[:-1] + [j * q] + [-1])
self.output = self.s
tf.compat.v1.disable_eager_execution()
inverse = tf.gradients(x_conv, x, x_conv)[0]
self.reconstruction_loss = tf.nn.l2_loss(
inverse - tf.stop_gradient(x)) / np.prod(self.shape_input)
def init_filters(self, j, q, k, learn_scales=False, learn_knots=False,
learn_filters=True, hilbert=False):
extra_octave = 1 if learn_scales else 0
self.filter_samples = k * 2 ** (j + extra_octave)
time_max = np.float32(k * 2**(j - 1 + extra_octave))
time_grid = tf.linspace(-time_max, time_max, self.filter_samples)
scales_base = 2**(tf.range(j * q, dtype=tf.float32) / np.float32(q))
scales_delta = tf.Variable(
tf.zeros(j * q), trainable=learn_scales, name='scales')
scales = scales_base + scales_delta
nyquist_offset = scales + \
tf.stop_gradient(tf.one_hot(0, j * q) * tf.nn.relu(1 - scales[0]))
scales_correction = tf.concat(
[tf.zeros(1),
tf.nn.relu(nyquist_offset[:-1] - nyquist_offset[1:])], 0)
self.scales = nyquist_offset + \
tf.stop_gradient(tf.cumsum(scales_correction))
knots_base = tf.Variable(
tf.ones(k), trainable=learn_knots, name='knots')
knots_sum = tf.cumsum(
tf.clip_by_value(
tf.expand_dims(knots_base, 0) * tf.expand_dims(self.scales, 1),
1, self.filter_samples - k), exclusive=True, axis=1)
self.knots = knots_sum - (k // 2) * tf.expand_dims(self.scales, 1)
if hilbert is True:
m = (np.cos(np.arange(k) * np.pi) * np.hamming(k)).astype(FORMAT)
p = (np.zeros(k)).astype(FORMAT)
self.m = tf.Variable(m, name='m', trainable=learn_filters)
self.p = tf.Variable(p, name='p', trainable=learn_filters)
# Boundary Conditions and centering
mask = np.ones(k, dtype=np.float32)
mask[0], mask[-1] = 0, 0
m_null = self.m - tf.reduce_mean(self.m[1:-1])
filters = real_hermite_interp(
time_grid, self.knots, m_null * mask, self.p * mask)
# Renorm and set filter-bank
filters_renorm = filters / tf.reduce_max(filters, 1, keepdims=True)
filters_fft = tf.signal.rfft(filters_renorm) # was spectral.rfft
filters = tf.signal.ifft(
tf.concat([filters_fft, tf.zeros_like(filters_fft)], 1))
# Define the parameters for saving
self.parameters = self.m, self.p, self.scales, self.knots
return filters
def renorm(self, parent, epsilon=1e-3):
# Extract all shapes.
if epsilon > 0:
s = self.s / (tf.expand_dims(parent.s, -2) + epsilon)
batch_size, *_, samples = s.get_shape().as_list()
return tf.reshape(s, [batch_size, -1, samples])
else:
return tf.reshape(self.s, [batch_size, -1, samples])
# testing
data = tf.random.uniform((4,3,16800), dtype=tf.float32)
batch_size = 4
args = {'layers': {'j': [4, 6, 8], 'q': [8, 2, 1], 'k': 7, 'pooling_type': 'average', 'decimation': 4, 'pooling': 1024, 'learn_scales': False, 'learn_knots': False, 'learn_filters': True, 'hilbert': True}, 'eps_norm': 0.001, 'eps_log': 0.0001, 'learning': {'epochs': 3, 'rate': 0.001}, 'pca': {'n_components': 5}, 'gmm': {'gmm_type': 'natural', 'trainable': False}, 'gmm_init': {'n_components': 10, 'max_iter': 1000, 'covariance_type': 'full', 'warm_start': True}}
# Run over batches
epochs = args['learning']['epochs']
learning_rate = args['learning']['rate']
for epoch in range(epochs):
# Gradually decrease learning rate over epochs
if epoch == epochs // 2:
learning_rate /= 5
if epoch == 3 * epochs // 4:
learning_rate /= 5
# Calculate scattering coefficients for all batches
scat_all = list()
n_batches = data.shape[0] // batch_size
for b in range(n_batches):
layers = [Scattering(data, index=0, **args['layers'])]
for i in range(1, 3):
layer = Scattering(layers[-1].u, index=i, **args['layers'])
layers.append(layer)
# Extract parameters.
net = [layer.parameters for layer in layers]
# Get reconstruction losses.
rl = tf.add_n([a.reconstruction_loss for a in layers])
# Renormalize coefficients.
r = list()
for i in range(1, 3):
r.append(layers[i].renorm(layers[i - 1], args['eps_norm']))
# Concatenate.
sx = tf.transpose(tf.concat(r, axis=1), [1, 0, 2])
sx = tf.reshape(sx, [sx.get_shape().as_list()[0], -1])
sx = tf.transpose(sx)
sx = tf.math.log(sx + args['eps_log'])
sx[np.isnan(sx)] = np.log(args['eps_log'])
sx[np.isinf(sx)] = np.log(args['eps_log'])
scat_all.append(sx)
The issue is from the line 'sx[np.isnan(sx)] = np.log(args['eps_log'])'. The full error is shown below:
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
Cell In [6], line 34
31 print("sx:", sx)
32 print("sx shape: ", sx.shape)
---> 34 sx[np.isnan(sx)] = np.log(args['eps_log'])
35 sx[np.isinf(sx)] = np.log(args['eps_log'])
36 scat_all.append(sx)
File c:\Python310\lib\site-packages\tensorflow\python\framework\ops.py:922, in Tensor.__array__(***failed resolving arguments***)
920 def __array__(self, dtype=None):
921 del dtype
--> 922 raise NotImplementedError(
923 f"Cannot convert a symbolic tf.Tensor ({self.name}) to a numpy array."
924 f" This error may indicate that you're trying to pass a Tensor to"
925 f" a NumPy call, which is not supported.")
NotImplementedError: Cannot convert a symbolic tf.Tensor (Log_2:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported.
Based on solutions on previous stackoverflow posts, I have tried to upgrade my tensorflow (2.11.0) and numpy versions (1.23.5) but that did not solve the problem. I saw some suggestions on downgrading numpy but because of other dependencies that did not work. My Python version is 3.8.16. Any suggestions on how to proceed? Thanks in advance.

Neural network only learns the last pattern when given several patterns sequentially

This post is about the same issue, but no proper answer has been given. And since this problem seems to be widespread, I'll keep my code behind the scene.
Following this source, I've written a network which does well when I give it a training example with a target vector. Using gradient descent I minimize the cost function to make the network provide the target vector when given the corresponding input vector. But this only works for one example!
The main goal of a neural network is to react differently for different inputs, and we should be able to train it to do so. I tried changing network weights by an average of delta-weights computed for each example, which failed: the training process gets stuck with the output vector holding averages of all the target vectors from the training set. No ideas left, no sources found to explain.
How do I train a neural network with a set of examples, not with just one input vector?
Update
For those wondering I'll attach my code below. Try to run this and you will see that instead of outputting 0 1 it provides 0.5 0.5, which is the result of subtracting averaged delta-weights.
import numpy as np
from sympy import symbols, lambdify
from sympy.functions.elementary.exponential import exp
from time import sleep
x = symbols('x')
sigmoid = exp(x) / (1 + exp(x))
sigmoid_der = sigmoid.diff(x)
sigmoid = lambdify(x, sigmoid)
sigmoid_der = lambdify(x, sigmoid_der)
class Neuron:
def __init__(self, amount_of_inputs: int, hidden = True):
self.inputs = np.random.rand(amount_of_inputs) if hidden else np.array([1])
self.bias = 0.0
self._activation = 0.0
self._wsum = 0.0
#property
def activation(self) -> float:
return self._activation
#property
def wsum(self) -> float:
return self._wsum
def calculate(self, indata):
wval = self.inputs * indata + self.bias
self._wsum = wval.sum()
self._activation = sigmoid(self._wsum)
class NeuralNetwork:
def __init__(self, shape: tuple):
self.shape = shape
self.layers = len(self.shape)
self.network = [None for _ in range(self.layers)]
self.network[0] = tuple([Neuron(1, hidden = False) for _ in range(shape[0])])
for L in range(1, self.layers):
self.network[L] = tuple([Neuron(shape[L - 1]) for _ in range(shape[L])])
self.network = tuple(self.network)
y = [symbols(f'y[{i}]') for i in range(shape[self.layers - 1])]
a = [symbols(f'a[{i}]') for i in range(shape[self.layers - 1])]
self.cost_function = sum([(y[i] - a[i]) ** 2 / 2 for i in range(shape[self.layers - 1])])
self.gradient = tuple([self.cost_function.diff(a[i]) for i in range(shape[self.layers - 1])])
self.cost_function = lambdify((y, a), self.cost_function)
self.gradient = lambdify((y, a), self.gradient)
def getLayer(self, L):
return np.array([self.network[L][i].activation for i in range(self.shape[L])])
def getWeightedSum(self, L):
return np.array([self.network[L][i].wsum for i in range(self.shape[L])])
def getInputsMatrix(self, L):
return np.array([self.network[L][i].inputs for i in range(self.shape[L])])
def calculate(self, values):
for i in range(self.shape[0]):
self.network[0][i].calculate(values[i])
for L in range(1, self.layers):
indata = self.getLayer(L - 1)
for j in range(self.shape[L]):
self.network[L][j].calculate(indata)
def get_result(self) -> tuple:
return tuple([self.network[self.layers - 1][i].activation for i in range(self.shape[self.layers - 1])])
def teach(self, targets, examples):
if len(targets) != len(examples):
raise TypeError("The amounts of target and input vectors do not coincide")
activations = [None for _ in range(len(examples))]
delta = activations.copy()
cost_is_low_enough = False
while not cost_is_low_enough:
for x in range(len(examples)):
self.calculate(examples[x])
activations[x] = [self.getLayer(l) for l in range(self.layers)]
delta[x] = [None for _ in range(self.layers - 1)]
network_output = self.getLayer(self.layers - 1)
output_weighted = self.getWeightedSum(self.layers - 1)
gradient_vector = np.array(self.gradient(targets[x], network_output))
delta[x][-1] = gradient_vector * sigmoid_der(output_weighted)
for l in range(self.layers - 2, 0, -1):
weight_matrix = self.getInputsMatrix(l + 1).transpose()
output_weighted = self.getWeightedSum(l)
activation = self.getLayer(l)
for j in range(self.shape[l]):
delta[x][l - 1] = (weight_matrix # delta[x][l]) * sigmoid_der(output_weighted) * activation
dw = [None for _ in range(self.layers - 1)]
for x in range(len(examples)):
self.calculate(examples[x])
for l in range(self.layers - 1):
dw[l] = np.empty(self.shape[l + 1])
for j in range(self.shape[l + 1]):
dw[l][j] = np.mean([delta[x][l][j] for x in range(len(examples))])
for l in range(1, self.layers):
for j in range(self.shape[l]):
for k in range(self.shape[l - 1]):
self.network[l][j].inputs[k] -= 0.1 * dw[l - 1][j]
cost = 0
for x in range(len(examples)):
self.calculate(examples[x])
network_output = np.array(self.get_result())
incost = self.cost_function(targets[x], network_output)
print(network_output, incost)
cost += incost
# sleep(0.05)
cost /= len(examples)
print()
if cost < 0.001: cost_is_low_enough = True
network = NeuralNetwork((2, 4, 1))
examples = np.array([
[1, 2],
[3, 4],
])
targets = np.array([
[0],
[1]
])
network.teach(targets, examples)
values_1 = np.array([5, 10])
network.calculate(values_1)
result = network.get_result()
print(result)
'''
values_2 = np.array([3, 4])
network.calculate(values_2)
result = network.get_result()
print(result)
'''

Error in Backpropagation: Neural Network predicts same class

I am writing Neural Network code from scratch using Numpy. But even after training my Network for many epochs, the predictions for each class is random and remains same irrespective of the input.
I have checked my concept according to Andrew Ng's Coursera ML course and towardsdatascience.com 's post. I think I'm making some very conceptual mistake which I cannot figure out.
Here is my code:
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def dsigmoid(y):
return y * (1 - y)
class NeuralNetwork:
def __init__(self, shape):
self.n_layers = len(shape)
self.shape = shape
self.weight = []
self.bias = []
i = 0
while i < self.n_layers - 1:
self.weight.append(np.random.normal(loc=0.0, scale=0.5,
size=(self.shape[i + 1], self.shape[i])))
self.bias.append(np.random.normal(loc=0.0, scale=0.3,
size=(self.shape[i + 1], 1)))
i += 1
def predict(self, X):
z = self.weight[0] # X + self.bias[0]
a = sigmoid(z)
i = 1
while i < self.n_layers - 1:
z = self.weight[i] # a + self.bias[i]
a = sigmoid(z)
i += 1
return a
def predictVerbose(self, X):
layers = [X]
z = self.weight[0] # X + self.bias[0]
a = sigmoid(z)
layers.append(a)
i = 1
while i < self.n_layers - 1:
z = self.weight[i] # a + self.bias[i]
a = sigmoid(z)
layers.append(a)
i += 1
return layers
def gradOne(self, X, y):
layers = self.predictVerbose(X)
h = layers[-1]
delta_b = [(h - y) * dsigmoid(h)]
delta_w = [delta_b[0] # layers[-2].T]
i = 1
while i < self.n_layers - 1:
buff = delta_b[-1]
delta_b.append((self.weight[-i].T # buff) * dsigmoid(layers[-(i + 1)]))
delta_w.append(delta_b[-1] # layers[-(i + 2)].T)
i += 1
return delta_b[::-1], delta_w[::-1]
def grad(self, data, l_reg=0):
#data: x1, x2, x3, ..., xm, y=(0, 1, 2,...)
m = len(data)
delta_b = []
delta_w = []
i = 0
while i < self.n_layers - 1:
delta_b.append(np.zeros((self.shape[i + 1], 1)))
delta_w.append(np.zeros((self.shape[i + 1], self.shape[i])))
i += 1
for row in data:
X = np.array(row[:-1])[np.newaxis].T
y = np.zeros((self.shape[-1], 1))
# print(row)
y[row[-1], 0] = 1
buff1, buff2 = self.gradOne(X, y)
i = 0
while i < len(delta_b):
delta_b[i] += buff1[i] / m
delta_w[i] += buff2[i] / m
i += 1
return delta_b, delta_w
def train(self, data, batch_size, epoch, alpha, l_reg=0):
m = len(data)
for i in range(epoch):
j = 0
while j < m:
delta_b, delta_w = self.grad(data[i: (i + batch_size + 1)])
i = 0
while i < len(self.weight):
self.weight[i] -= alpha * delta_w[i]
self.bias[i] -= alpha * delta_b[i]
i += 1
j += batch_size
if __name__ == "__main__":
x = NeuralNetwork([2, 2, 2])
# for y in x.gradOne(np.array([[1], [2], [3]]), np.array([[0], [1]])):
# print(y.shape)
data = [
[1, 1, 0],
[0, 0, 0],
[1, 0, 1],
[0, 1, 1]
]
x.train(data, 4, 1000, 0.1)
print(x.predict(np.array([[1], [0]])))
print(x.predict(np.array([[1], [1]])))
Please point out where I am going wrong.
Unfortunately I don't have enough reputation to comment on your post but here's a link to a numpy only neural network that I've made (tested on blob data from sklearn and mnist).
https://github.com/jaymody/backpropagation/blob/master/old/NeuralNetwork.py
Are you still interested in this problem? As I understood, you try to get the XOR-perceptron with direct and inverse outputs?
It looks like:
1. You need to change the expression
delta_b, delta_w = self.grad(data[i: (i + batch_size + 1)]) to
delta_b, delta_w = self.grad(data[::])
in the train function.
2. Some of random values, used for initialization of synaptic and biases weights, requires much more training cycles for alpha=0.1. Try to play with the alpha (I set it up to 2) and number of epochs (I tried up to 20000).
Also your code do not works with 1-layered networks. I tried to train 1-layered AND and OR perceptrons and I got very strange results (or maybe it requires even much more cycles). But in 2-layered cases it works fine.

Which TensorFlow operations do not have GPU implementations?

The code below is trying to do linear implementations, similar to numpy.interp(). But it is quite slow and I think the reason is that some operations in the code do not have GPU implementations. But I don't know which one. Could anyone tell me and suggest some solutions?
def tf_interp(b, x, y):
xaxis_pad = tf.concat([[tf.minimum(b, tf.gather(x, 0))], x, [tf.maximum(b, tf.gather(x, x.get_shape()[0] - 1))]],
axis=0)
yaxis_pad = tf.concat([[0.0], y, [0.0]], axis=0)
cmp = tf.cast(b >= xaxis_pad, dtype=tf.float32)
diff = cmp[1:] - cmp[:-1]
idx = tf.argmin(diff)
# Interpolate
alpha = (b - xaxis_pad[idx]) / (xaxis_pad[idx + 1] - xaxis_pad[idx])
res = alpha * yaxis_pad[idx + 1] + (1 - alpha) * yaxis_pad[idx]
def f1(): return 0.0
def f2(): return alpha * yaxis_pad[idx + 1] + (1 - alpha) * yaxis_pad[idx]
res = tf.cond(pred=tf.is_nan(res), true_fn=f1, false_fn=f2)
return res
def tf_interpolation(t, x, y):
t = tf.cast(t, tf.float32)
x = tf.cast(x, tf.float32)
y = tf.cast(y, tf.float32)
t1 = tf.reshape(t, [-1, ])
t_return = tf.map_fn(lambda b: tf_interp(b, x, y), t1)
t_return = tf.reshape(t_return, [t.get_shape()[0], t.get_shape()[1]])
return t_return

Categories