General minimal residual method with right-preconditioner of SSOR - python

I am trying to implement the algorithm of GMRES with right-preconditioner P for solving the linear system Ax = b . The code is running without error; however, it pops into unprecise result for me because the error I have is very large. For the GMRES method (without preconditioning matrix - remove P in the algorithm), the error I get is around 1e^{-12} and it converges with the same matrix.
import numpy as np
from scipy import sparse
import matplotlib.pyplot as plt
from scipy.linalg import norm as norm
import scipy.sparse as sp
from scipy.sparse import diags
"""The program is to split the matrix into D-diagonal; L: strictly lower matrix; U strictly upper matrix
satisfying: A = D - L - U """
def splitMat(A):
n,m = A.shape
if (n == m):
diagval = np.diag(A)
D = diags(diagval,0).toarray()
L = (-1)*np.tril(A,-1)
U = (-1)*np.triu(A,1)
else:
print("A needs to be a square matrix")
return (L,D,U)
"""Preconditioned Matrix for symmetric successive over-relaxation (SSOR): """
def P_SSOR(A,w):
## Split up matrix A:
L,D,U = splitMat(A)
Comp1 = (D - w*U)
Comp2 = (D - w*L)
Comp1inv = np.linalg.inv(Comp1)
Comp2inv = np.linalg.inv(Comp2)
P = w*(2-w)*np.matmul(Comp1inv, np.matmul(D,Comp2inv))
return P
"""GMRES_SSOR using right preconditioning P:
A - matrix of linear system Ax = b
x0 - initial guess
tol - tolerance
maxit - maximum iteration """
def myGMRES_SSOR(A,x0, b, tol, maxit):
matrixSize = A.shape[0]
e = np.zeros((maxit+1,1))
rr = 1
rstart = 2
X = x0
w = 1.9 ## in ssor
P = P_SSOR(A,w) ### preconditioned matrix
### Starting the GMRES ####
for rs in range(0,rstart+1):
### first check the residual:
if rr<tol:
break
else:
r0 = (b-A.dot(x0))
rho = norm(r0)
e[0] = rho
H = np.zeros((maxit+1,maxit))
Qcol = np.zeros((matrixSize, maxit+1))
Qcol[:,0:1] = r0/rho
for k in range(1, maxit+1):
### Arnodi procedure ##
Qcol[:,k] =np.matmul(np.matmul(A,P), Qcol[:,k-1]) ### This step applies P here:
for j in range(0,k):
H[j,k-1] = np.dot(np.transpose(Qcol[:,k]),Qcol[:,j])
Qcol[:,k] = Qcol[:,k] - (np.dot(H[j,k-1], Qcol[:,j]))
H[k,k-1] =norm(Qcol[:,k])
Qcol[:,k] = Qcol[:,k]/H[k,k-1]
### QR decomposition step ###
n = k
Q = np.zeros((n+1, n))
R = np.zeros((n, n))
R[0, 0] = norm(H[0:n+2, 0])
Q[:, 0] = H[0:n+1, 0] / R[0,0]
for j in range (0, n+1):
t = H[0:n+1, j-1]
for i in range (0, j-1):
R[i, j-1] = np.dot(Q[:, i], t)
t = t - np.dot(R[i, j-1], Q[:, i])
R[j-1, j-1] = norm(t)
Q[:, j-1] = t / R[j-1, j-1]
g = np.dot(np.transpose(Q), e[0:k+1])
Y = np.dot(np.linalg.inv(R), g)
Res= e[0:n] - np.dot(H[0:n, 0:n], Y[0:n])
rr = norm(Res)
#### second check on the residual ###
if rr < tol:
break
#### Updating the solution with the preconditioned matrix ####
X = X + np.matmul(np.matmul(P,Qcol[:, 0:k]), Y) ### This steps applies P here:
return X
######
A = np.random.rand(100,100)
x = np.random.rand(100,1)
b = np.matmul(A,x)
x0 = np.zeros((100,1))
maxit = 100
tol = 0.00001
x = myGMRES_SSOR(A,x0,b,tol,maxit)
res = b - np.matmul(A,x)
print(norm(res))
print("Solution with gmres\n", np.matmul(A,x))
print("---------------------------------------")
print("b matrix:", b)
I hope anyone could help me figure out this!!!

I'm not sure where you got you "Symmetric_successive_over-relaxation" SSOR code from, but it appears to be wrong. You also seem to be assuming that A is symmetric matrix, but in your random test case it is not.
Following SSOR's Wikipedia entry, I replaced your P_SSOR function with
def P_SSOR(A,w):
L,D,U = splitMat(A)
P = 2/(2-w) * (1/w*D+L)*np.linalg.inv(D)*(1/w*D+L).T
return P
and your test matrix with
A = np.random.rand(100,100)
A = A + A.T
and your code works up to a 12 digit residual error.

Related

Implementation of an algorithm for simultaneous diagonalization

I am trying to write an implementation of an algorithm for the simultaneous diagonalization of two matrices (which are assumed to be simultaneously diagonalizable). However, the algorithm does not seem to converge. The algorithm is described in SIAM J. Matrix Anal. Appl. 14, 927 (1993).
Here is the first part of my code to set up a test case:
import numpy as np
import numpy.linalg as lin
from scipy.optimize import minimize
N = 3
# Unitary example matrix
X = np.array([
[-0.54717736-0.43779416j, 0.26046313+0.11082439j, 0.56151027-0.33692186j],
[-0.33452046-0.37890784j, -0.40907097-0.70730291j, -0.15344477+0.23100467j],
[-0.31253864-0.39468687j, 0.05342909+0.49940543j, -0.70062586+0.05835082j]
])
# Generate eigenvalues
LA = np.diag(np.arange(0, N))
LB = np.diag(np.arange(N, 2*N))
# Generate simultaneously diagonalizable matrices
A = X # LA # np.conj(X).T
B = X # LB # np.conj(X).T
This should generate two 3x3 matrices which are simultaneously diagonalizable, since they are constructed this way via X. The following code block then defines a few helper functions:
def off2(A, B):
"""Defines the distance from the matrices from
their diagonal form.
"""
C = np.abs(A) ** 2 + np.abs(B) ** 2
diag_idx = np.diag_indices(N)
C[diag_idx] = 0
return np.sum(C)
def Rijcs(i, j, c, s):
"""Function R(i, j, c, s) from the paper, see
Eq. (1) therein. Used for plane rotations in
the plane ij.
"""
res = np.eye(N, dtype=complex)
res[i, i] = c
res[i, j] = -np.conj(s)
res[j, i] = s
res[j, j] = np.conj(c)
return res
def cs(theta, phi):
"""Parametrization for c and s."""
c = np.cos(theta)
s = np.exp(1j * phi) * np.sin(theta)
return c, s
With these definitions, the algorithm can be implemented:
tol = 1e-10
Q = np.eye(N, dtype=complex)
while True:
off = off2(A, B)
# Print statement for debugging purposes
print(off)
# Terminate if the result is converged
if off <= tol * (lin.norm(A, "fro") + lin.norm(B, "fro")):
break
for i in range(N):
for j in range(i + 1, N):
def fij(c, s):
aij = A[i, j]
aji = A[j, i]
aii = A[i, i]
ajj = A[j, j]
bij = B[i, j]
bji = B[j, i]
bii = B[i, i]
bjj = B[j, j]
x = np.array(
[
[np.conj(aij), np.conj(aii - ajj), -np.conj(aji)],
[aji, (aii - ajj), -aij ],
[np.conj(bij), np.conj(bii - bjj), -np.conj(bji)],
[bji, (bii - bjj), -bij ]
]
)
y = np.array(
[
[c ** 2],
[c * s],
[s ** 2]
]
)
return lin.norm(x # y, 2)
# 5
result = minimize(
lambda x: fij(*cs(x[0], x[1])),
x0=(0, 0),
bounds=(
(-0.25 * np.pi, 0.25 * np.pi),
(-np.pi, np.pi)
),
)
theta, phi = result['x']
c, s = cs(theta, phi)
# 6
R = Rijcs(i, j, c, s)
# 7
Q = Q # R
A = np.conj(R).T # A # R
B = np.conj(R).T # B # R
As you can observe from the print statement, the "distance" of A and B from diagonal form does not really converge. Instead, the values printed range from 0.5 up to 3 and oscillate up and down. Is there a bug in this code and if so, where exactly is it?

Error in implementation of Crank-Nicolson method applied to 1D TDSE?

This is more of a computational physics problem, and I've asked it on physics stack exchange, but no answers on there. This is, I suppose, a mix of the disciplines on here and there (and maybe even mathematics stack exchange), so finding the right place to post is a task in of itself apparently...
I'm attempting to use Crank-Nicolson scheme to solve the TDSE in 1D. The initial wave is a real Gaussian that has been normalised wrt its probability density. As the solution evolves, a depression grows in the central peak of the real part of the wave, and the imaginary part's central trough is perhaps a bit higher than I expect (image below).
Does this behaviour seem reasonable? I have searched around and not seen questions/figures that are similar. I've tested another person's code from Github and it exhibits the same behaviour, which makes me feel a bit better. But I still think the center peak should just decrease in height and increase in width. The likelihood of me getting a physics-based explanation is relatively low here I'd assume, but a computational-based explanation on errors I may have made is more likely.
I'm happy to give more information, for example my code, or the matrices used in the scheme, etc. Thanks in advance!
Here's a link to GIF of time evolution:
And the part of my code relevant to solving the 1D TDSE:
(pretty much the entire thing except the plotting)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
# Define function for norm.
def normf(dxc, uc, ic):
return sum(dxc * np.square(np.abs(uc[ic, :])))
# Define function for expectation value of position.
def xexpf(dxc, xc, uc, ic):
return sum(dxc * xc * np.square(np.abs(uc[ic, :])))
# Define function for expectation value of squared position.
def xexpsf(dxc, xc, uc, ic):
return sum(dxc * np.square(xc) * np.square(np.abs(uc[ic, :])))
# Define function for standard deviation.
def sdaf(xexpc, xexpsc, ic):
return np.sqrt(xexpsc[ic] - np.square(xexpc[ic]))
# Time t: t0 =< t =< tf. Have N steps at which to evaluate the CN scheme. The
# time interval is dt. decp: variable for plotting to certain number of decimal
# places.
t0 = 0
tf = 20
N = 200
dt = tf / N
t = np.linspace(t0, tf, num = N + 1, endpoint = True)
decp = str(dt)[::-1].find('.')
# Initialise array for filling with norm values at each time step.
norm = np.zeros(len(t))
# Initialise array for expectation value of position.
xexp = np.zeros(len(t))
# Initialise array for expectation value of squared position.
xexps = np.zeros(len(t))
# Initialise array for alternate standard deviation.
sda = np.zeros(len(t))
# Position x: -a =< x =< a. M is an even number. There are M + 1 total discrete
# positions, for the points to be symmetric and centred at x = 0.
a = 100
M = 1200
dx = (2 * a) / M
x = np.linspace(-a, a, num = M + 1, endpoint = True)
# The gaussian function u diffuses over time. sd sets the width of gaussian. u0
# is the initial gaussian at t0.
sd = 1
var = np.power(sd, 2)
mu = 0
u0 = np.sqrt(1 / np.sqrt(np.pi * var)) * np.exp(-np.power(x - mu, 2) / (2 * \
var))
u = np.zeros([len(t), len(x)], dtype = 'complex_')
u[0, :] = u0
# Normalise u.
u[0, :] = u[0, :] / np.sqrt(normf(dx, u, 0))
# Set coefficients of CN scheme.
alpha = dt * -1j / (4 * np.power(dx, 2))
beta = dt * 1j / (4 * np.power(dx, 2))
# Tridiagonal matrices Al and AR. Al to be solved using Thomas algorithm.
Al = np.zeros([len(x), len(x)], dtype = 'complex_')
for i in range (0, M):
Al[i + 1, i] = alpha
Al[i, i] = 1 - (2 * alpha)
Al[i, i + 1] = alpha
# Corner elements for BC's.
Al[M, M], Al[0, 0] = 1 - alpha, 1 - alpha
Ar = np.zeros([len(x), len(x)], dtype = 'complex_')
for i in range (0, M):
Ar[i + 1, i] = beta
Ar[i, i] = 1 - (2 * beta)
Ar[i, i + 1] = beta
# Corner elements for BC's.
Ar[M, M], Ar[0, 0] = 1 - 2*beta, 1 - beta
# Thomas algorithm variables. Following similar naming as in Wiki article.
a = np.diag(Al, -1)
b = np.diag(Al)
c = np.diag(Al, 1)
NT = len(b)
cp = np.zeros(NT - 1, dtype = 'complex_')
for n in range(0, NT - 1):
if n == 0:
cp[n] = c[n] / b[n]
else:
cp[n] = c[n] / (b[n] - (a[n - 1] * cp[n - 1]))
d = np.zeros(NT, dtype = 'complex_')
dp = np.zeros(NT, dtype = 'complex_')
# Iterate over each time step to solve CN method. Maintain boundary
# conditions. Keep track of standard deviation.
for i in range(0, N):
# BC's.
u[i, 0], u[i, M] = 0, 0
# Find RHS.
d = np.dot(Ar, u[i, :])
for n in range(0, NT):
if n == 0:
dp[n] = d[n] / b[n]
else:
dp[n] = (d[n] - (a[n - 1] * dp[n - 1])) / (b[n] - (a[n - 1] * \
cp[n - 1]))
nc = NT - 1
while nc > -1:
if nc == NT - 1:
u[i + 1, nc] = dp[nc]
nc -= 1
else:
u[i + 1, nc] = dp[nc] - (cp[nc] * u[i + 1, nc + 1])
nc -= 1
norm[i] = normf(dx, u, i)
xexp[i] = xexpf(dx, x, u, i)
xexps[i] = xexpsf(dx, x, u, i)
sda[i] = sdaf(xexp, xexps, i)
# Fill in final norm value.
norm[N] = normf(dx, u, N)
# Fill in final position expectation value.
xexp[N] = xexpf(dx, x, u, N)
# Fill in final squared position expectation value.
xexps[N] = xexpsf(dx, x, u, N)
# Fill in final standard deviation value.
sda[N] = sdaf(xexp, xexps, N)

Network Cost Function code Python Implementation

I was implementing Andrew NG’s ML course in Python and in week 5 exercise 4 I was referring to a code. What I didn’t understand was the need to use np.trace() in the final output. Having a problem visualising the matrices
import numpy as np
from scipy.optimize import minimize
import scipy.io
import matplotlib.pyplot as plt
data_dict = scipy.io.loadmat('ex4_orig_octave/ex4data1.mat')
X = data_dict['X']
y = data_dict['y'].ravel()
M = X.shape[0]
N = X.shape[1]
L = 26 # = number of nodes in the hidden layer (including bias node)
K = len(np.unique(y))
X = np.hstack((np.ones((M, 1)), X))
Y = np.zeros((M, K), dtype='uint8')
for i, row in enumerate(Y):
Y[i, y[i] - 1] = 1
weights_dict = scipy.io.loadmat('ex4_orig_octave/ex4weights.mat')
theta_1 = weights_dict['Theta1']
theta_2 = weights_dict['Theta2']
nn_params_saved = np.concatenate((theta_1.flatten(), theta_2.flatten()))
def nn_cost_function(nn_params, X, Y, M, N, L, K):
"""Python version of nnCostFunction.m after completing 'Part 1'."""
# Unroll the parameter vector.
theta_1 = nn_params[:(L - 1) * (N + 1)].reshape(L - 1, N + 1)
theta_2 = nn_params[(L - 1) * (N + 1):].reshape(K, L)
# Calculate activations in the second layer.
a_2 = sigmoid(theta_1.dot(X.T))
# Add the second layer's bias node.
a_2_p = np.vstack((np.ones(M), a_2))
# Calculate the activation of the third layer.
a_3 = sigmoid(theta_2.dot(a_2_p))
# Calculate the cost function.
cost = 1 / M * np.trace(- Y.dot(np.log(a_3)) - (1 - Y).dot(np.log(1 - a_3)))
return cost
cost_saved = nn_cost_function(nn_params_saved, X, Y, M, N, L, K)
print 'Cost at parameters (loaded from ex4weights): %.6f' % cost_saved
print '(this value should be about 0.287629)'
The operation 1/M * np.trace() is calculating the average cost over a batch of size M:
A bit less readable, but significantly faster should be:
np.sum(np.sum(Y.multiply(np.log(a_3.T)),axis=1),axis=0)
, if Y.shape==(M,K) and a_3.shape==(K,M):
Y = lambda : np.random.uniform(size=(5000,10)) # (M,K)
a3 = lambda : np.random.uniform(size=(10,5000)) # (K,M)
timeit.timeit('import numpy as np; np.trace(Y().dot(a3()))', number=10, globals=globals())
# 0.5633535870001651
timeit.timeit('import numpy as np; np.sum(np.sum(np.multiply(Y(),a3().T),axis=1),axis=0)', number=10, globals=globals())
# 0.013223066000136896

NMF Matrix Multiplication not reducing error

In NMF we have to iteratively multiply matrix W and H such that it begins to approximate V.
# specify the rank
r = 4
print('r:', r)
# Generate some synthetic data to create V0 from Wtrue and Htrue.
# Our challenge is then to find V ~ V0 by iterative learning,
# starting from random values of w and H.
Wtrue = np.array([[1,2],
[3,4],
[5,6],
[7,8]])
Htrue = np.array([[9,11,13],
[10,12,14]])
# Wtrue = np.random.rand(40,5) # nxr, i: 1 -> n, a: 1 -> r
# Htrue = np.random.rand(5,10) # rxm, i: 1-> r, mu: 1 -> m
V0 = Wtrue # Htrue # converge: v ~ wH
print('V0:\n', V0)
# Dimensions of data (n rows and m cols)
n, m = V0.shape
print('n(row), m(col):', V0.shape)
# Normalise columnwise
V = np.zeros(shape=(n,m))
for i in range(m):
V[:,i] = V0[:,i] / np.max(V0[:,i])
print('V:\n', V)
# Initialise W as n rows and r cols
W = np.random.rand(n,r)
# Initialise H as r rows and m cols
H = np.random.rand(r,m)
print('wH:\n', W#H)
print('W:\n', W)
print('H:\n', H)
# Maximum iteration of 40k
maxIter = 8
f = np.zeros(shape=(maxIter, 1))
# Initial error
f[0] = np.linalg.norm(V - W # H, ord='fro')
print('Initial error: ', np.log(np.linalg.norm(V - W # H, ord='fro')))
print('Learning weights...')
for iter in range(maxIter-1):
# Update W
for i in range(n):
for a in range(r):
S = V[i,:] / (W#H)[i,:] # H[a,:]
W[i,a] = W[i,a] * S
# Update H
for a in range(r):
for u in range(m):
T = V[:,u] / (W#H)[:,u] # W[:,a]
H[a,u] = H[a,u] * T
# Measure Error
f[iter+1] = np.linalg.norm(V - W # H, ord='fro')
fig, ax = plt.subplots(figsize=(5,4))
ax.set_title('Convergence of NMF')
ax.set_xlabel('Iteration')
ax.set_ylabel('log(Error)')
ax.plot(np.arange(maxIter), np.log(f), c='m')
ax.grid(True)
print('Final error: ', np.log(np.linalg.norm(V - W # H, ord='fro')))
plt.savefig('images/NMF_convergence_r32.png')
The problem is that my solution stops reducing the error value after a small number of iteration. The correct code should reduce the error value with higher number of iteration.
This is where the problem most likely lies:
# Update W
for i in range(n):
for a in range(r):
S = V[i,:] / (W#H)[i,:] # H[a,:]
W[i,a] = W[i,a] * S
# Update H
for a in range(r):
for u in range(m):
T = V[:,u] / (W#H)[:,u] # W[:,a]
H[a,u] = H[a,u] * T
Here's the plot I'm getting:
Appreciate any thoughts.
I came up with a quick fix after reading chapter 10 of Programming Collective Intelligence by Toby Segaran.
First make sure you run from numpy import * then inside the main loop:
# ...
for iter in range(maxIter-1):
# Update W
wn=(V#transpose(H))
wd=(W#H#transpose(H))
W=matrix(array(W)*array(wn)/array(wd))
# Update H
hn=(transpose(W)*V)
hd=(transpose(W)*W*H)
H=matrix(array(H)*array(hn)/array(hd))
# Measure Error
# ...
This has given me the correct plot:

Jacobi Method Outputting Wrong Eigenvalues

I am working on creating an eigenvalue calculator using the Jacobi method and it runs without errors. However, it does not find the correct eigenvalues nor does it find the correct eigenvectors. For some reason, I always get eigenvalues of 0. I think it may not be saving the matrix I input for MatrixA.
(Link to Jacobi method in case you are not familiar: http://fourier.eng.hmc.edu/e176/lectures/ch1/node1.html)
import numpy as np
import bettertimeit as time
import matplotlib as plt
def Jacobi(A):
n = A.shape[0] # matrix size #columns = #lines
maxit = 100 # maximum number of iterations
eps = 1.0e-15 # accuracy goal
pi = np.pi
info = 0 # return flag
ev = np.zeros(n,float) # initialize eigenvalues
U = np.zeros((n,n),float) # initialize eigenvector
for i in range(0,n): U[i,i] = 1.0
for t in range(0,maxit):
s = 0 # compute sum of off-diagonal elements in A(i,j)
for i in range(0,n): s = s + np.sum(np.abs(A[i,(i+1):n]))
if (s < eps): # diagonal form reached
info = t
for i in range(0,n):ev[i] = A[i,i]
break
else:
limit = s/(n*(n-1)/2.0) # average value of off-diagonal elements
for i in range(0,n-1): # loop over lines of matrix
for j in range(i+1,n): # loop over columns of matrix
if (np.abs(A[i,j]) > limit): # determine (ij) such that |A(i,j)| larger than average
# value of off-diagonal elements
denom = A[i,i] - A[j,j] # denominator of Eq. (3.61)
if (np.abs(denom) < eps): phi = pi/4 # Eq. (3.62)
else: phi = 0.5*np.arctan(2.0*A[i,j]/denom) # Eq. (3.61)
si = np.sin(phi)
co = np.cos(phi)
for k in range(i+1,j):
store = A[i,k]
A[i,k] = A[i,k]*co + A[k,j]*si # Eq. (3.56)
A[k,j] = A[k,j]*co - store *si # Eq. (3.57)
for k in range(j+1,n):
store = A[i,k]
A[i,k] = A[i,k]*co + A[j,k]*si # Eq. (3.56)
A[j,k] = A[j,k]*co - store *si # Eq. (3.57)
for k in range(0,i):
store = A[k,i]
A[k,i] = A[k,i]*co + A[k,j]*si
A[k,j] = A[k,j]*co - store *si
store = A[i,i]
A[i,i] = A[i,i]*co*co + 2.0*A[i,j]*co*si +A[j,j]*si*si # Eq. (3.58)
A[j,j] = A[j,j]*co*co - 2.0*A[i,j]*co*si +store *si*si # Eq. (3.59)
A[i,j] = 0.0 # Eq. (3.60)
for k in range(0,n):
store = U[k,j]
U[k,j] = U[k,j]*co - U[k,i]*si # Eq. (3.66)
U[k,i] = U[k,i]*co + store *si # Eq. (3.67)
info = -t # in case no convergence is reached set info to a negative value "-t"
return ev,U,t
n = int(input("Enter the matrix size: "))
A = np.zeros((n, n))
for i in range(n):
A[i] = input().split(" ")
MatrixA = np.array(A)
print("A= ")
print(A)
for i in range(A.shape[0]):
row = ["{}*x{}".format(A[i, j], j + 1) for j in range(A.shape[1])]
# Jacobi-method
ev,U,t = Jacobi(A)
print ("JACOBI METHOD: Number of rotations = ",t)
print ("Eigenvalues = ",ev)
print ("Eigenvectors = ")
print (U)

Categories