tensorflow gradient all nan values-graph convolutional networks - python

I have designed a single layer neural network:
x=AF
y=xw
A is matrix(nn) Graph Adjacency Matrix ,F is matrix(n*2) and w is Wight.
This is the code:
import numpy as np
import networkx as nx
def relu(X):
return np.maximum(0,X)
A1 = np.matrix([
[0, 0, 1, 0,0,0],
[0, 0, 1, 0,0,0],
[0, 1, 0, 0,1,1],
[1, 0, 1, 0,0,0],
[0,1,0,0,1,0],
[0,1,1,0,1,0]],
dtype=float
)
G = nx.from_numpy_matrix(A1, create_using=nx.DiGraph)
nodes=list(G.nodes())
print(nodes)
print('edges',len(G.edges()))
UN_G=G.to_undirected()
A=nx.adjacency_matrix(UN_G)
print('un_edges',len(UN_G.edges()))
F = np.matrix([
[G.in_degree(i), G.out_degree(i)]
for i in range(A.shape[0])
], dtype=float)
I = np.matrix(np.eye(A.shape[0]))
A_prem = A + I
D_hat= np.array(np.sum(A_prem, axis=0))[0]
D_hat = np.matrix(np.diag(D_hat))
A_hat=D_hat**-1 * A_prem * D_hat**-1
x=A_hat*F
x=A*F
(x)
In the unsupervised loss function: A is sparse matrix
def loss_fn(y,A):
coo = A.tocoo()
tmp=zip(coo.row, coo.col, coo.data)
sum = tf.Variable(0.0)
for i,j,k in tmp:
sum=sum+k*tf.linalg.norm(y[i]-y[j])
return (sum)
In the weight training phase:
epchs=50
w = tf.Variable(tf.random.normal((2, 2)), name='w')
eta=0.3
for ephoc in range(epchs):
with tf.GradientTape(persistent=True) as tape:
tape.watch(w)
y = tf.nn.softmax(x # w)
loss=loss_fn(y,A)
print(ephoc,' ',loss)
dl_dw = tape.gradient(loss, w)
w.assign_sub(eta*dl_dw)
value of [dl_dw] and loss are nan. What is the problem with my code? Thank you for guiding me

Related

PyTorch: Constant loss value and output within linear neural network

I'm trying to make a neural network that calculates the right input angles for a rotation matrix. I'm having the classic linear network structure and at the last step the output is put into my function for the rotation, which returns a point in space as a list.
Here's the code I wrote for it:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable as V
import torch.optim as opt
import numpy as np
import matplotlib.pyplot as plt
cam_pos = np.array([500, 160, 1140, 1]) # with respect to vehicle coordinates
img_res = (1280, 1080)
aspect_ratio = img_res[0] / img_res[1]
# in px
cx = 636 / aspect_ratio
cy = 548 / aspect_ratio
fx = 241 / aspect_ratio
fy = 238 / aspect_ratio
u = 872
v = 423
D = 1900 # mm
img_pt = np.array([u, v, 1, 1/D]).T
camera_matrix = np.array([[fx, 0, cx, 0],
[0, fy, cy, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]])
class Network(nn.Module):
def __init__(self):
super(Network, self).__init__()
self.lin1 = nn.Linear(3,10)
self.lin2 = nn.Linear(10,10)
self.lin3 = nn.Linear(10,3)
self.angle_list = []
def forward(self, x):
x = F.relu(self.lin1(x))
x = F.relu(self.lin2(x))
x = self.lin3(x)
self.angle_list.append(list(x.detach().numpy()))
return torch.tensor(self.cam_function(x), requires_grad=True)
def rot_x(self, alpha):
return np.array([ [1, 0, 0, 0],
[0, np.cos(alpha), -np.sin(alpha), 0],
[0, np.sin(alpha), np.cos(alpha), 0],
[0, 0, 0, 1]
])
def rot_y(self, beta):
return np.array([ [np.cos(beta), 0, np.sin(beta), 0],
[0, 1, 0, 0],
[-np.sin(beta), 0, np.cos(beta), 0],
[0, 0, 0, 1]
])
def rot_z(self, gamma):
return np.array([ [np.cos(gamma), -np.sin(gamma), 0, 0],
[np.sin(gamma), np.cos(gamma), 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
])
def cam_function(self, net_output):
net_output = net_output.detach().numpy()
x = net_output[0]
y = net_output[1]
z = net_output[2]
rot_m = np.dot(self.rot_z(z), np.dot(self.rot_y(y), self.rot_x(x)))
extrinsic_matrix = np.array([ [rot_m[0][0], rot_m[0][1], rot_m[0][2], cam_pos[0]],
[rot_m[1][0], rot_m[1][1], rot_m[1][2], cam_pos[1]],
[rot_m[2][0], rot_m[2][1], rot_m[2][2], cam_pos[2]],
[0, 0, 0, 1 ]])
cam_output = img_pt * D * np.linalg.inv(camera_matrix) * extrinsic_matrix / 1000
cam_output = [cam_output[0][0], cam_output[1][1], cam_output[2][2]]
return cam_output
model = Network()
loss_function = nn.CrossEntropyLoss()
optimizer = opt.SGD(model.parameters(), lr=1e-3)
target = torch.tensor([1.636, 1.405, 0.262]).float()
dummy_data = torch.tensor([0, 0, 0]).float()
losses = []
for epoch in range(5000):
model.train()
prediction= model(dummy_data)
loss = loss_function(prediction, target)
losses.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
And with that I'm getting a constant value of the loss and the output as well.
7.3858967314779305
tensor([7.9938, 3.9272, 1.8514], dtype=torch.float64, requires_grad=True)
7.3858967314779305
tensor([7.9938, 3.9272, 1.8514], dtype=torch.float64, requires_grad=True)
7.3858967314779305
tensor([7.9938, 3.9272, 1.8514], dtype=torch.float64, requires_grad=True)
Can someone help me please? If this works I would then extract the "angles" the NN used for the rotation matrix
Do not use numpy in cam_function, use torch.tensor. Using numpy, the gradient does not flow when using backward.

Expectation Maximization Algorithm (EM) for Gaussian Mixture Models (GMMs)

I'm trying to apply the Expectation Maximization Algorithm (EM) to a Gaussian Mixture Model (GMM) using Python and NumPy. The PDF document I am basing my implementation on can be found here.
Below are the equations:
When applying the algorithm I get the mean of the first and second cluster equal to:
array([[2.50832195],
[2.51546208]])
When the actual vector means for the first and second cluster are, respectively:
array([[0],
[0]])
and:
array([[5],
[5]])
The same thing happens when getting the values of the covariance matrices I get:
array([[7.05168736, 6.17098629],
[6.17098629, 7.23009494]])
When it should be:
array([[1, 0],
[0, 1]])
for both clusters.
Here is the code:
np.random.seed(1)
# first cluster
X_11 = np.random.normal(0, 1, 1000)
X_21 = np.random.normal(0, 1, 1000)
# second cluster
X_12 = np.random.normal(5, 1, 1000)
X_22 = np.random.normal(5, 1, 1000)
X_1 = np.concatenate((X_11,X_12), axis=None)
X_2 = np.concatenate((X_21,X_22), axis=None)
# data matrix of k x n dimensions (2 x 2000 dimensions)
X = np.concatenate((np.array([X_1]),np.array([X_2])), axis=0)
# multivariate normal distribution function gives n x 1 vector (2000 x 1 vector)
def normal_distribution(x, mu, sigma):
mvnd = []
for i in range(np.shape(x)[1]):
gd = (2*np.pi)**(-2/2) * np.linalg.det(sigma)**(-1/2) * np.exp((-1/2) * np.dot(np.dot((x[:,i:i+1]-mu).T, np.linalg.inv(sigma)), (x[:,i:i+1]-mu)))
mvnd.append(gd)
return np.reshape(np.array(mvnd), (np.shape(x)[1], 1))
# Initialized parameters
sigma_1 = np.array([[10, 0],
[0, 10]])
sigma_2 = np.array([[10, 0],
[0, 10]])
mu_1 = np.array([[10],
[10]])
mu_2 = np.array([[10],
[10]])
pi_1 = 0.5
pi_2 = 0.5
Sigma_1 = np.empty([2000, 2, 2])
Sigma_2 = np.empty([2000, 2, 2])
for i in range(10):
# E-step:
w_i1 = (pi_1*normal_distribution(X, mu_1, sigma_1))/(pi_1*normal_distribution(X, mu_1, sigma_1) + pi_2*normal_distribution(X, mu_2, sigma_2))
w_i2 = (pi_2*normal_distribution(X, mu_2, sigma_2))/(pi_1*normal_distribution(X, mu_1, sigma_1) + pi_2*normal_distribution(X, mu_2, sigma_2))
# M-step:
pi_1 = np.sum(w_i1)/2000
pi_2 = np.sum(w_i2)/2000
mu_1 = np.array([(1/(np.sum(w_i1)))*np.sum(w_i1.T*X, axis=1)]).T
mu_2 = np.array([(1/(np.sum(w_i2)))*np.sum(w_i2.T*X, axis=1)]).T
for i in range(2000):
Sigma_1[i:i+1, :, :] = w_i1[i:i+1,:]*np.dot((X[:,i:i+1]-mu_1), (X[:,i:i+1]-mu_1).T)
Sigma_2[i:i+1, :, :] = w_i2[i:i+1,:]*np.dot((X[:,i:i+1]-mu_2), (X[:,i:i+1]-mu_2).T)
sigma_1 = (1/(np.sum(w_i1)))*np.sum(Sigma_1, axis=0)
sigma_2 = (1/(np.sum(w_i2)))*np.sum(Sigma_2, axis=0)
Would really appreciate if someone could point out the mistake in my code or in my misunderstanding of the algorithm..

Custom loss with loops in tensorflow

I have a function in my data prepocessing which performs a blockwise DCT on 3D numpy arrays in YCbCr-mode.
def perform_blockwise_dct(img, ratio):
imsize = img.shape
dct_blocks = np.zeros(imsize)
for i in np.r_[:imsize[0]:8]:
for j in np.r_[:imsize[1]:8]:
dct_blocks[i:(i+8),j:(j+8), 0] = dct(dct(img[i:(i+8),j:(j+8), 0].T, norm='ortho').T, norm='ortho')
dct_blocks[i:(i+8),j:(j+8), 1] = dct(dct(img[i:(i+8),j:(j+8), 1].T, norm='ortho').T, norm='ortho')
dct_blocks[i:(i+8),j:(j+8), 2] = dct(dct(img[i:(i+8),j:(j+8), 2].T, norm='ortho').T, norm='ortho')
return dct_blocks
To be able to implement a custom mean square error function I would like to reverse this function. The problem is that when implementing the loss function it is a tensorflow tensor. There is an inverse DCT function to use. However, I do not know how to perform an equivilent double for-loop to do it block-wise. Currently it is done on the entire image, like this:
def mse_custom_loss(a, b)
y = tf.spectral.idct(a[:,:,0], norm='ortho')
cb = tf.spectral.idct(a[:,:,1], norm='ortho')
cr = tf.spectral.idct(a[:,:,2], norm='ortho')
a = K.stack([y, cb, cr], axis=-1)
y = tf.spectral.idct(b[:,:,0], norm='ortho')
cb = tf.spectral.idct(b[:,:,1], norm='ortho')
cr = tf.spectral.idct(b[:,:,2], norm='ortho')
b = K.stack([y, cb, cr], axis=-1)
return mean_square_error(a, b)
Any ideas on how to do it correctly? I assume lambda functions might be a possibility?
I think this is a TensorFlow equivalent to your NumPy/SciPy function:
import tensorflow as tf
def perform_blockwise_dct_tf(img):
shape = tf.shape(img)
x, y, c = shape[0], shape[1], shape[2]
img_res = tf.reshape(img, [x // 8, 8, y // 8, 8, c])
img_dct1 = tf.spectral.dct(tf.transpose(img_res, [0, 1, 2, 4, 3]), norm='ortho')
img_dct2 = tf.spectral.dct(tf.transpose(img_dct1, [0, 2, 4, 3, 1]), norm='ortho')
out = tf.reshape(tf.transpose(img_dct2, [0, 4, 1, 2, 3]), shape)
return out
A small test:
import numpy as np
from scipy.fftpack import dct
def perform_blockwise_dct(img):
imsize = img.shape
dct_blocks = np.zeros(imsize, dtype=img.dtype)
for i in np.r_[:imsize[0]:8]:
for j in np.r_[:imsize[1]:8]:
dct_blocks[i:(i+8), j:(j+8), 0] = dct(dct(img[i:(i+8), j:(j+8), 0].T, norm='ortho').T, norm='ortho')
dct_blocks[i:(i+8), j:(j+8), 1] = dct(dct(img[i:(i+8), j:(j+8), 1].T, norm='ortho').T, norm='ortho')
dct_blocks[i:(i+8), j:(j+8), 2] = dct(dct(img[i:(i+8), j:(j+8), 2].T, norm='ortho').T, norm='ortho')
return dct_blocks
np.random.seed(100)
# DCT in TensorFlow only supports float32
img = np.random.rand(128, 256, 3).astype(np.float32)
out1 = perform_blockwise_dct(img)
with tf.Graph().as_default(), tf.Session() as sess:
out2 = sess.run(perform_blockwise_dct_tf(img))
# There is a bit of error
print(np.allclose(out1, out2, rtol=1e-5, atol=1e-6))
# True

How to test if a matrix is a rotation matrix?

I have a task to check if a matrix is a rotation matrix, I write code as follow:
import numpy as np
def isRotationMatrix(R):
# some code here
# return True or False
R = np.array([
[0, 0, 1],
[1, 0, 0],
[0, 1, 0],
])
print(isRotationMatrix(R)) # Should be True
R = np.array([
[-1, 0, 0],
[0, 1, 0],
[0, 0, 1],
])
print(isRotationMatrix(R)) # Should be False
I don't know how to implement the function isRotationMatrix.
My naive implement, it only works for a 3x3 matrix:
def isRotationMatrix(R_3x3):
should_be_norm_one = np.allclose(np.linalg.norm(R_3x3, axis=0), np.ones(shape=3))
x = R_3x3[:, 0].ravel()
y = R_3x3[:, 1].ravel()
z = R_3x3[:, 2].ravel()
should_be_perpendicular = \
np.allclose(np.cross(x, y), z) \
and np.allclose(np.cross(y, z), x) \
and np.allclose(np.cross(z, x), y)
return should_be_perpendicular and should_be_norm_one
I am using this definition of rotation matrix. A rotation matrix should satisfy the conditions M (M^T) = (M^T) M = I and det(M) = 1. Here M^T denotes transpose of M, I denotes identity matrix and det(M) represents determinant of matrix M.
You can use the following python code to check if the matrix is a rotation matrix.
import numpy as np
''' I have chosen `M` as an example. Feel free to put in your own matrix.'''
M = np.array([[0,-1,0],[1,0,0],[0,0,1]])
def isRotationMatrix(M):
tag = False
I = np.identity(M.shape[0])
if np.all((np.matmul(M, M.T)) == I) and (np.linalg.det(M)==1): tag = True
return tag
if(isRotationMatrix(M)): print 'M is a rotation matrix.'
else: print 'M is not a rotation matrix.'
A rotation matrix is a orthonormal matrix and its determinant should be 1.
My implement:
import numpy as np
def isRotationMatrix(R):
# square matrix test
if R.ndim != 2 or R.shape[0] != R.shape[1]:
return False
should_be_identity = np.allclose(R.dot(R.T), np.identity(R.shape[0], np.float))
should_be_one = np.allclose(np.linalg.det(R), 1)
return should_be_identity and should_be_one
if __name__ == '__main__':
R = np.array([
[0, 0, 1],
[1, 0, 0],
[0, 1, 0],
])
print(isRotationMatrix(R)) # True
R = np.array([
[-1, 0, 0],
[0, 1, 0],
[0, 0, 1],
])
print(isRotationMatrix(R)) # True
print(isRotationMatrix(np.zeros((3, 2)))) # False

Setting the Threshold for a Perceptron

How can I set the threshold for a single layer perceptron?
I have
import numpy as np
import sklearn
from sklearn.linear_model import Perceptron
xs = np.array([
# x1 x2
0, 0, #m1
0, 1,
1, 0,
1, 1
]).reshape(4, 2)
ys = np.array([1, 1, 0, 1]).reshape(4,)
ppn = Perceptron(max_iter=10, eta0=0.2, random_state=0)
ppn.fit(xs, ys)
What I wanna do is to train the ppn weights, with initial
weights=(0,0), eta=0.2, threshold=0,5
Eg. for m1, initial: ys=1:
net= w1*x1+w2*x2 = 0*1+0*1=0
f(net) = 1, if net>=threshold else 0,
f(0) = 0 # because, 0 < 0.5
error = 0.2*(1-0) = 0.2
weight_update_w1 = 0+0.2*1
The learning should stop, if once for all m's no
weight update is performed.
How can I set the threshold for ppn to 0.5?

Categories