NMF Matrix Multiplication not reducing error - python

In NMF we have to iteratively multiply matrix W and H such that it begins to approximate V.
# specify the rank
r = 4
print('r:', r)
# Generate some synthetic data to create V0 from Wtrue and Htrue.
# Our challenge is then to find V ~ V0 by iterative learning,
# starting from random values of w and H.
Wtrue = np.array([[1,2],
[3,4],
[5,6],
[7,8]])
Htrue = np.array([[9,11,13],
[10,12,14]])
# Wtrue = np.random.rand(40,5) # nxr, i: 1 -> n, a: 1 -> r
# Htrue = np.random.rand(5,10) # rxm, i: 1-> r, mu: 1 -> m
V0 = Wtrue # Htrue # converge: v ~ wH
print('V0:\n', V0)
# Dimensions of data (n rows and m cols)
n, m = V0.shape
print('n(row), m(col):', V0.shape)
# Normalise columnwise
V = np.zeros(shape=(n,m))
for i in range(m):
V[:,i] = V0[:,i] / np.max(V0[:,i])
print('V:\n', V)
# Initialise W as n rows and r cols
W = np.random.rand(n,r)
# Initialise H as r rows and m cols
H = np.random.rand(r,m)
print('wH:\n', W#H)
print('W:\n', W)
print('H:\n', H)
# Maximum iteration of 40k
maxIter = 8
f = np.zeros(shape=(maxIter, 1))
# Initial error
f[0] = np.linalg.norm(V - W # H, ord='fro')
print('Initial error: ', np.log(np.linalg.norm(V - W # H, ord='fro')))
print('Learning weights...')
for iter in range(maxIter-1):
# Update W
for i in range(n):
for a in range(r):
S = V[i,:] / (W#H)[i,:] # H[a,:]
W[i,a] = W[i,a] * S
# Update H
for a in range(r):
for u in range(m):
T = V[:,u] / (W#H)[:,u] # W[:,a]
H[a,u] = H[a,u] * T
# Measure Error
f[iter+1] = np.linalg.norm(V - W # H, ord='fro')
fig, ax = plt.subplots(figsize=(5,4))
ax.set_title('Convergence of NMF')
ax.set_xlabel('Iteration')
ax.set_ylabel('log(Error)')
ax.plot(np.arange(maxIter), np.log(f), c='m')
ax.grid(True)
print('Final error: ', np.log(np.linalg.norm(V - W # H, ord='fro')))
plt.savefig('images/NMF_convergence_r32.png')
The problem is that my solution stops reducing the error value after a small number of iteration. The correct code should reduce the error value with higher number of iteration.
This is where the problem most likely lies:
# Update W
for i in range(n):
for a in range(r):
S = V[i,:] / (W#H)[i,:] # H[a,:]
W[i,a] = W[i,a] * S
# Update H
for a in range(r):
for u in range(m):
T = V[:,u] / (W#H)[:,u] # W[:,a]
H[a,u] = H[a,u] * T
Here's the plot I'm getting:
Appreciate any thoughts.

I came up with a quick fix after reading chapter 10 of Programming Collective Intelligence by Toby Segaran.
First make sure you run from numpy import * then inside the main loop:
# ...
for iter in range(maxIter-1):
# Update W
wn=(V#transpose(H))
wd=(W#H#transpose(H))
W=matrix(array(W)*array(wn)/array(wd))
# Update H
hn=(transpose(W)*V)
hd=(transpose(W)*W*H)
H=matrix(array(H)*array(hn)/array(hd))
# Measure Error
# ...
This has given me the correct plot:

Related

Remove breathe artifacts from PPG-Signal with adaptive Filter

I'm working on python project that consists on removing-breathe artifacts from ppg signal. I'm trying to apply either a frequency or time adaptive Filter (LMS) but it doesn't work for me.
The reference breathe signal ist sampled with 25 Hz and the PPG signal is sampled with 50 Hz.
The picture shows the raw PPG-Signal(blue) and I want to achieve the orange signal as output-signal.
But as I tried to apply the lms algo I got a noisy signal and the artifacts couldn't be removed.
As I tried the frequency adaptive filter (the second code below) I got a signal like this.
The codes are below.
def lms(self, x, d, N, mu):
nIters = min(len(x), len(d)) - N
u = np.zeros(N)
w = np.zeros(N)
e = np.zeros(nIters)
for n in range(nIters):
u[1:] = u[:-1]
u[0] = x[n]
e_n = d[n] - np.dot(u, w)
w = w + mu * e_n * u
e[n] = e_n
return e
def freq_adap_filter(self, x, d, M, mu):
num_block = min(len(x), len(d)) // M
w_k = 0
n_zeros = np.zeros(M)
e = np.zeros(num_block * M)
for n in range(num_block):
# calculate filter output
x_n = np.concatenate([n_zeros, x[n * M:(n + 1) * M]]) # size 2M
x_k = fft(x_n) # size 2M
y_k = x_k * w_k # size 2M filter output
d_n = d[n * M:(n + 1) * M] # size M
y_n = ifft(y_k) # size 2M
y_n = y_n[M:2 * M] # we take only the second half size M
e_n = d_n - y_n # size M
e[n * M:(n + 1) * M] = e_n
e_n = np.concatenate([n_zeros, e_n]) # size 2M
e_k = fft(e_n) # size 2M
# update coefficient
x_k_conj = x_k.conj() # size 2M
g_k = x_k_conj * e_k # size 2M
g_n = ifft(g_k) # size 2M
g_n = g_n[0:M] # last Block deleted size M
g_n = np.concatenate([g_n, n_zeros]) # size 2M
w_k = w_k + mu * fft(g_n) # size 2M
return e
I would be very thankful if you could help me. I'm looking forward for your answers. Thank you.

General minimal residual method with right-preconditioner of SSOR

I am trying to implement the algorithm of GMRES with right-preconditioner P for solving the linear system Ax = b . The code is running without error; however, it pops into unprecise result for me because the error I have is very large. For the GMRES method (without preconditioning matrix - remove P in the algorithm), the error I get is around 1e^{-12} and it converges with the same matrix.
import numpy as np
from scipy import sparse
import matplotlib.pyplot as plt
from scipy.linalg import norm as norm
import scipy.sparse as sp
from scipy.sparse import diags
"""The program is to split the matrix into D-diagonal; L: strictly lower matrix; U strictly upper matrix
satisfying: A = D - L - U """
def splitMat(A):
n,m = A.shape
if (n == m):
diagval = np.diag(A)
D = diags(diagval,0).toarray()
L = (-1)*np.tril(A,-1)
U = (-1)*np.triu(A,1)
else:
print("A needs to be a square matrix")
return (L,D,U)
"""Preconditioned Matrix for symmetric successive over-relaxation (SSOR): """
def P_SSOR(A,w):
## Split up matrix A:
L,D,U = splitMat(A)
Comp1 = (D - w*U)
Comp2 = (D - w*L)
Comp1inv = np.linalg.inv(Comp1)
Comp2inv = np.linalg.inv(Comp2)
P = w*(2-w)*np.matmul(Comp1inv, np.matmul(D,Comp2inv))
return P
"""GMRES_SSOR using right preconditioning P:
A - matrix of linear system Ax = b
x0 - initial guess
tol - tolerance
maxit - maximum iteration """
def myGMRES_SSOR(A,x0, b, tol, maxit):
matrixSize = A.shape[0]
e = np.zeros((maxit+1,1))
rr = 1
rstart = 2
X = x0
w = 1.9 ## in ssor
P = P_SSOR(A,w) ### preconditioned matrix
### Starting the GMRES ####
for rs in range(0,rstart+1):
### first check the residual:
if rr<tol:
break
else:
r0 = (b-A.dot(x0))
rho = norm(r0)
e[0] = rho
H = np.zeros((maxit+1,maxit))
Qcol = np.zeros((matrixSize, maxit+1))
Qcol[:,0:1] = r0/rho
for k in range(1, maxit+1):
### Arnodi procedure ##
Qcol[:,k] =np.matmul(np.matmul(A,P), Qcol[:,k-1]) ### This step applies P here:
for j in range(0,k):
H[j,k-1] = np.dot(np.transpose(Qcol[:,k]),Qcol[:,j])
Qcol[:,k] = Qcol[:,k] - (np.dot(H[j,k-1], Qcol[:,j]))
H[k,k-1] =norm(Qcol[:,k])
Qcol[:,k] = Qcol[:,k]/H[k,k-1]
### QR decomposition step ###
n = k
Q = np.zeros((n+1, n))
R = np.zeros((n, n))
R[0, 0] = norm(H[0:n+2, 0])
Q[:, 0] = H[0:n+1, 0] / R[0,0]
for j in range (0, n+1):
t = H[0:n+1, j-1]
for i in range (0, j-1):
R[i, j-1] = np.dot(Q[:, i], t)
t = t - np.dot(R[i, j-1], Q[:, i])
R[j-1, j-1] = norm(t)
Q[:, j-1] = t / R[j-1, j-1]
g = np.dot(np.transpose(Q), e[0:k+1])
Y = np.dot(np.linalg.inv(R), g)
Res= e[0:n] - np.dot(H[0:n, 0:n], Y[0:n])
rr = norm(Res)
#### second check on the residual ###
if rr < tol:
break
#### Updating the solution with the preconditioned matrix ####
X = X + np.matmul(np.matmul(P,Qcol[:, 0:k]), Y) ### This steps applies P here:
return X
######
A = np.random.rand(100,100)
x = np.random.rand(100,1)
b = np.matmul(A,x)
x0 = np.zeros((100,1))
maxit = 100
tol = 0.00001
x = myGMRES_SSOR(A,x0,b,tol,maxit)
res = b - np.matmul(A,x)
print(norm(res))
print("Solution with gmres\n", np.matmul(A,x))
print("---------------------------------------")
print("b matrix:", b)
I hope anyone could help me figure out this!!!
I'm not sure where you got you "Symmetric_successive_over-relaxation" SSOR code from, but it appears to be wrong. You also seem to be assuming that A is symmetric matrix, but in your random test case it is not.
Following SSOR's Wikipedia entry, I replaced your P_SSOR function with
def P_SSOR(A,w):
L,D,U = splitMat(A)
P = 2/(2-w) * (1/w*D+L)*np.linalg.inv(D)*(1/w*D+L).T
return P
and your test matrix with
A = np.random.rand(100,100)
A = A + A.T
and your code works up to a 12 digit residual error.

Jacobi Method Outputting Wrong Eigenvalues

I am working on creating an eigenvalue calculator using the Jacobi method and it runs without errors. However, it does not find the correct eigenvalues nor does it find the correct eigenvectors. For some reason, I always get eigenvalues of 0. I think it may not be saving the matrix I input for MatrixA.
(Link to Jacobi method in case you are not familiar: http://fourier.eng.hmc.edu/e176/lectures/ch1/node1.html)
import numpy as np
import bettertimeit as time
import matplotlib as plt
def Jacobi(A):
n = A.shape[0] # matrix size #columns = #lines
maxit = 100 # maximum number of iterations
eps = 1.0e-15 # accuracy goal
pi = np.pi
info = 0 # return flag
ev = np.zeros(n,float) # initialize eigenvalues
U = np.zeros((n,n),float) # initialize eigenvector
for i in range(0,n): U[i,i] = 1.0
for t in range(0,maxit):
s = 0 # compute sum of off-diagonal elements in A(i,j)
for i in range(0,n): s = s + np.sum(np.abs(A[i,(i+1):n]))
if (s < eps): # diagonal form reached
info = t
for i in range(0,n):ev[i] = A[i,i]
break
else:
limit = s/(n*(n-1)/2.0) # average value of off-diagonal elements
for i in range(0,n-1): # loop over lines of matrix
for j in range(i+1,n): # loop over columns of matrix
if (np.abs(A[i,j]) > limit): # determine (ij) such that |A(i,j)| larger than average
# value of off-diagonal elements
denom = A[i,i] - A[j,j] # denominator of Eq. (3.61)
if (np.abs(denom) < eps): phi = pi/4 # Eq. (3.62)
else: phi = 0.5*np.arctan(2.0*A[i,j]/denom) # Eq. (3.61)
si = np.sin(phi)
co = np.cos(phi)
for k in range(i+1,j):
store = A[i,k]
A[i,k] = A[i,k]*co + A[k,j]*si # Eq. (3.56)
A[k,j] = A[k,j]*co - store *si # Eq. (3.57)
for k in range(j+1,n):
store = A[i,k]
A[i,k] = A[i,k]*co + A[j,k]*si # Eq. (3.56)
A[j,k] = A[j,k]*co - store *si # Eq. (3.57)
for k in range(0,i):
store = A[k,i]
A[k,i] = A[k,i]*co + A[k,j]*si
A[k,j] = A[k,j]*co - store *si
store = A[i,i]
A[i,i] = A[i,i]*co*co + 2.0*A[i,j]*co*si +A[j,j]*si*si # Eq. (3.58)
A[j,j] = A[j,j]*co*co - 2.0*A[i,j]*co*si +store *si*si # Eq. (3.59)
A[i,j] = 0.0 # Eq. (3.60)
for k in range(0,n):
store = U[k,j]
U[k,j] = U[k,j]*co - U[k,i]*si # Eq. (3.66)
U[k,i] = U[k,i]*co + store *si # Eq. (3.67)
info = -t # in case no convergence is reached set info to a negative value "-t"
return ev,U,t
n = int(input("Enter the matrix size: "))
A = np.zeros((n, n))
for i in range(n):
A[i] = input().split(" ")
MatrixA = np.array(A)
print("A= ")
print(A)
for i in range(A.shape[0]):
row = ["{}*x{}".format(A[i, j], j + 1) for j in range(A.shape[1])]
# Jacobi-method
ev,U,t = Jacobi(A)
print ("JACOBI METHOD: Number of rotations = ",t)
print ("Eigenvalues = ",ev)
print ("Eigenvectors = ")
print (U)

How can I code a matrix within a matrix using a loop?

So I have this 3x3 G matrix (not shown here, it's irrelevant to my problem) that I created using the two variables u (a vector, x - y) and the scalar k. x_j = (x_1 (j), x_2 (j), x_3 (j)) and y_j = (y_1 (j), y_2 (j), y_3 (j)). alpha_j is a 3x3 matrix. The A matrix is block diagonal matrix of size 3nx3n. I am having trouble with the W matrix. How do I code a matrix of size 3nx3n, where the (i,j)th block is the 3x3 matrix given by alpha_i*G_[ij]*alpha_j?? I am lost.
My alpha_j matrix also seems to be having some trouble. The loop keeps throwing me the error, "only length-1 arrays can be converted to Python scalars." pls help :/
def W(x, y, k, alpha, A):
u = x - y
n = x.shape[0]
W = np.zeros((3*n, 3*n))
for i in range(0, n-1):
for j in range(0, n-1):
#u = -np.array([[x[i,0] - x[j,0]], [x[i,1] - x[j,1]], [0]]) ??
W[i][j] = (alpha_j(alpha, A) * G(u, k) * alpha_j(alpha, A))
W[i][i] = np.zeros((n, n))
return W
def alpha_j(a, A):
alph = np.array([[0,0,0],[0,0,0],[0,0,0]],complex)
rho = np.random.rand(3,1)
for i in range(0, 2):
for j in range(0, 2):
alph[i][j] = (rho[i] * a * A[i][j])
return alph
#-------------------------------------------------------------------
x1 = np.array([[1], [2], [0]])
y1 = np.array([[4], [5], [0]])
# SYSTEM PARAMETERS
# incoming Wave angle
theta = 0 # can range from [0, 2pi)
# susceptibility
chi = 10 + 1j
# wavelength
lam = 0.5 # microns (values between .4-.7)
# frequency
k = (2 * np.pi)/lam # 1/microns
# volume
V_0 = (0.05)**3 # microns^3
# incoming wave vector
K = k * np.array([[0], [np.sin(theta)], [np.cos(theta)]])
# polarization vector
vecinc = np.array([[1], [0], [0]]) # (can choose any vector perpendicular to K)
# for the fixed alpha case
alpha = (V_0 * 3 * chi)/(chi + 3)
# 3 x 3 matrix
A = np.matlib.identity(3) # could be any symmetric matrix,
#-------------------------------------------------------------------
# TEST FUNCTIONS
test = G((x1-y1), k)
print(test)
w = W(x1, y1, k, alpha, A)
print(w)
Sometimes my W loops throws me the error, "can't set an array element with a sequence." But I need to set each array element in this arbitrary matrix W to the 3x3 matrix created by multiplying alpha by G...
To your question of how to create a new array with a block for each element, the following should do the trick:
G = np.random.random([3,3])
result = np.zeros([9,9])
num_blocks = 3
a = np.random.random([3,3])
b = np.random.random([3,3])
for i in range(G.shape[0]):
for j in range(G.shape[1]):
block_result = a*G[i,j]*b
for k in range(num_blocks):
for l in range(num_blocks):
result[3*i + k, 3*j + l] = block_result[i, j]
You should be able to generalize from there. I hope I've understood correctly.
EDIT: It looks like I haven't understood correctly. I'm leaving it in hopes it spurs you to an answer. The general idea is to generate ranges of indices to operate on, and then just operate on them directly. Slicing might be helpful, too.
Ah, you asked how to create a diagonal filled with blocks. In that case:
num_diagonal_blocks = 3 # for example
for block_dim in range(num_diagonal_blocks)
# do your block calculation...
for k in range(G.shape[0]):
for l in range(G.shape[1]):
result[3*block_dim + k, 3*block_dim + l] = # assign to element of block
I think that's nearly it.

non-negative matrix factorization failing to converge

I'm trying to implement non-negative matrix factorization using the Kullback-Liebler divergence as a similarity measure. The algorithm is described in: http://hebb.mit.edu/people/seung/papers/nmfconverge.pdf. Below is my python / numpy implementation, with an example matrix to run it on.
In a nutshell, the algorithm is supposed to learn matrices W(n by r) and H(r by m) such that V(n by m) is approximately WH. You start with random values in W and H, and by following the update rules described in the Seung and Lee paper, you're supposed to get closer and closer to good approximations for W and H.
The algorithm is proven to monotonically reduce the divergence measure, but that's not what happens in my implementation. Instead, it settles into an alternation between two divergence values. If you look at W and H, you can see that the resulting factorization is not particularly good.
I've wondered whether to use the updated or old H when calculating the update for W. I tried it both ways, and it doesn't change the behavior of the implementation.
I've checked my implementation against the paper a bunch of times, and I don't see what I'm doing wrong. Can anyone shed some light on the issue?
import numpy as np
def update(V, W, H, r, n, m):
n,m = V.shape
WH = W.dot(H)
# equation (5)
H_coeff = np.zeros(H.shape)
for a in range(r):
for mu in range(m):
for i in range(n):
H_coeff[a, mu] += W[i, a] * V[i, mu] / WH[i, mu]
H_coeff[a, mu] /= sum(W)[a]
H = H * H_coeff
W_coeff = np.zeros(W.shape)
for i in range(n):
for a in range(r):
for mu in range(m):
W_coeff[i, a] += H[a, mu] * V[i, mu] / WH[i, mu]
W_coeff[i, a] /= sum(H.T)[a]
W = W * W_coeff
return W, H
def factor(V, r, iterations=100):
n, m = V.shape
avg_V = sum(sum(V))/n/m
W = np.random.random(n*r).reshape(n,r)*avg_V
H = np.random.random(r*m).reshape(r,m)*avg_V
for i in range(iterations):
WH = W.dot(H)
divergence = sum(sum(V * np.log(V/WH) - V + WH)) # equation (3)
print "At iteration " + str(i) + ", the Kullback-Liebler divergence is", divergence
W,H = update(V, W, H, r, n, m)
return W, H
V = np.arange(0.01,1.01,0.01).reshape(10,10)
W, H = factor(V, 6)
How to eliminate the alternation effect:
The very last line of the Proof of Theorem 2 reads,
By reversing the roles of H and W, the update rule for W can similarly
be shown to be nonincreasing.
Thus we can surmise that updating H can be done independently of updating W. That means after updating H:
H = H * H_coeff
we should also update the intermediate value WH before updating W:
WH = W.dot(H)
W = W * W_coeff
Both updates decrease the divergence.
Try it: Just stick WH = W.dot(H) before the computation for W_coeff, and the alternation effect goes away.
Simplifying the code:
When dealing with NumPy arrays, use their mean and sum methods, and avoid using the Python sum function:
avg_V = sum(sum(V))/n/m
can be written as
avg_V = V.mean()
and
divergence = sum(sum(V * np.log(V/WH) - V + WH)) # equation (3)
can be written as
divergence = ((V * np.log(V_over_WH)) - V + WH).sum()
Avoid the Python builtin sum function because
it is slower than the NumPy sum method, and
it is not as versatile as the NumPy sum method. (It
does not allow you to specify the axis on which to sum. We managed to eliminate two calls to Python's sum with one call to NumPy's sum or mean method.)
Eliminate the triple for-loop:
But a bigger improvement in both speed and readability can be had by replacing
H_coeff = np.zeros(H.shape)
for a in range(r):
for mu in range(m):
for i in range(n):
H_coeff[a, mu] += W[i, a] * V[i, mu] / WH[i, mu]
H_coeff[a, mu] /= sum(W)[a]
H = H * H_coeff
with
V_over_WH = V/WH
H *= (np.dot(V_over_WH.T, W) / W.sum(axis=0)).T
Explanation:
If you look at the equation 5 update rule for H, first notice that indices for V and (W H) are identical. So you can replace V / (W H) with
V_over_WH = V/WH
Next, note that in the numerator we are summing over the index i, which is the first index in both W and V_over_WH. We can express that as matrix multiplication:
np.dot(V_over_WH.T, W).T
And the denominator is simply:
W.sum(axis=0).T
If we divide the numerator and denominator
(np.dot(V_over_WH.T, W) / W.sum(axis=0)).T
we get a matrix indexed by the two remaining indices, alpha and mu, in that order. That is the same as the indices for H. So we want to multiply H by this ratio element-wise. Perfect. NumPy multiplies arrays element-wise by default.
Thus, we can express the entire update rule for H as
H *= (np.dot(V_over_WH.T, W) / W.sum(axis=0)).T
So, putting it all together:
import numpy as np
np.random.seed(1)
def update(V, W, H, WH, V_over_WH):
# equation (5)
H *= (np.dot(V_over_WH.T, W) / W.sum(axis=0)).T
WH = W.dot(H)
V_over_WH = V / WH
W *= np.dot(V_over_WH, H.T) / H.sum(axis=1)
WH = W.dot(H)
V_over_WH = V / WH
return W, H, WH, V_over_WH
def factor(V, r, iterations=100):
n, m = V.shape
avg_V = V.mean()
W = np.random.random(n * r).reshape(n, r) * avg_V
H = np.random.random(r * m).reshape(r, m) * avg_V
WH = W.dot(H)
V_over_WH = V / WH
for i in range(iterations):
W, H, WH, V_over_WH = update(V, W, H, WH, V_over_WH)
# equation (3)
divergence = ((V * np.log(V_over_WH)) - V + WH).sum()
print("At iteration {i}, the Kullback-Liebler divergence is {d}".format(
i=i, d=divergence))
return W, H
V = np.arange(0.01, 1.01, 0.01).reshape(10, 10)
# V = np.arange(1,101).reshape(10,10).astype('float')
W, H = factor(V, 6)

Categories