import numpy as np
from scipy.linalg import solve_triangular as triSolve
#O(n) per iteration, so overall O(nN), good for large SPD/SDD matrices
def GS_iter(A, b, N):
m = len(A)
L = np.tril(A)
P = L-A
print(P)
x = np.zeros(m)
print(x)
for k in range(N):
x = triSolve(L,b+P#x, True)
return x
#examples
A = np.array([[10,2,3,1],[1,10,0,1],[0.2,1,10,2],[0.1,3,3,10]])
b = np.array([1,2,1,0])
x = GS_iter(A,b,50000)
ans = A#x-b
print(ans)
print(np.linalg.norm(ans))
Above is my Gauss-Seidel method in Python. For some reason it is not converging even after 50000 iterations to the solution even when the matrix A is strict diagonal dominant. Below is the same implementation in MATLAB which works:
function x = gSeidel(A,B,N)
[n,~] = size(A);
L = tril(A);
P = L-A; %P = -U
x = zeros(n,1); %x_0
for k = 1:N
x = L\(B+P*x);
end
end
What mistakes did I make? I think it is in TriSolve method since if I replaced it with regular LU solver such as (np.linalg.solve) it works. Why doesn't triangular solve behave as intended here?
the lower argument is the fourth
replace your line by x = triSolve(L,b+P#x, lower=True)
Signature:
triSolve(
a,
b,
trans=0,
lower=False,
unit_diagonal=False,
overwrite_b=False,
debug=None,
check_finite=True,
)
Related
So I am trying to solve the differential equation $\frac{d^2y}{dx^2} = -y(x)$ subject to boundary conditions y(0) = 0 and y(1) = 1 ,the analytic solution is y(x) = sin(x)/sin(1).
I am using three point stencil to approximate the double derivative.
The curves obtained through these ways should match at least at the boundaries ,but my solutions have small differences even at the boundaries.
I am attaching the code, Please tell me what is wrong.
import numpy as np
import scipy.linalg as lg
from scipy.sparse.linalg import eigs
from scipy.sparse.linalg import inv
from scipy import sparse
import matplotlib.pyplot as plt
a = 0
b = 1
N = 1000
h = (b-a)/N
r = np.arange(a,b+h,h)
y_a = 0
y_b = 1
def lap_three(r):
h = r[1]-r[0]
n = len(r)
M_d = -2*np.ones(n)
#M_d = M_d + B_d
O_d = np.ones(n-1)
mat = sparse.diags([M_d,O_d,O_d],offsets=(0,+1,-1))
#print(mat)
return mat
def f(r):
h = r[1]-r[0]
n = len(r)
return -1*np.ones(len(r))*(h**2)
def R_mat(f,r):
r_d = f(r)
R_mat = sparse.diags([r_d],offsets=[0])
#print(R_mat)
return R_mat
#def R_mat(r):
# M_d = -1*np.ones(len(r))
def make_mat(r):
main = lap_three(r) - R_mat(f,r)
return main
main = make_mat(r)
main_mat = main.toarray()
print(main_mat)
'''
eig_val , eig_vec = eigs(main, k = 20,which = 'SM')
#print(eig_val)
Val = eig_vec.T
plt.plot(r,Val[0])
'''
main_inv = inv(main)
inv_mat = main_inv.toarray()
#print(inv_mat)
#print(np.dot(main_mat,inv_mat))
n = len(r)
B_d = np.zeros(n)
B_d[0] = 0
B_d[-1] = 1
#print(B_d)
#from scipy.sparse.linalg import spsolve
A = np.abs(np.dot(inv_mat,B_d))
plt.plot(r[0:10],A[0:10],label='calculated solution')
real = np.sin(r)/np.sin(1)
plt.plot(r[0:10],real[0:10],label='analytic solution')
plt.legend()
#plt.plot(r,real)
#plt.plot(r,A)
'''diff = A-real
plt.plot(r,diff)'''
There is no guarantee of what the last point in arange(a,b+h,h) will be, it will mostly be b, but could in some cases also be b+h. Better use
r,h = np.linspace(a,b,N+1,retstep=True)
The linear system consists of the equations for the middle positions r[1],...,r[N-1]. These are N-1 equations, thus your matrix size is one too large.
You could keep the matrix construction shorter by including the h^2 term already in M_d.
If you use sparse matrices, you can also use the sparse solver A = spsolve(main, B_d).
The equations that make up the system are
A[k-1] + (-2+h^2)*A[k] + A[k+1] = 0
The vector on the right side thus needs to contain the values -A[0] and -A[N]. This should clear up the sign problem, no need to cheat with the absolute value.
The solution vector A corresponds, as constructed from the start, to r[1:-1]. As there are no values for postitions 0 and N inside, there can also be no difference.
PS: There is no relaxation involved here, foremost because this is no iterative method. Perhaps you meant a finite difference method.
I have optimized a bit on calculating the Mandelbrot set, & I now wish to be able to specify whether my arrays should be float64 or float32 instead of the easier implementation with type complex128 or complex64. I use the fact that for a complex number (a+jb)^2 = a^2-b^2 + (2ab)j, but this seems to give me a slightly different wrong mandelbrot set. The code is seen below:
from timeit import default_timer as timer
import numpy as np
from numexpr import evaluate
import matplotlib.pyplot as plt
#%% Inputs
N = 5000
I = 20
T = 2 #Thresholdenter code here
#%% Functions
def mandel_brot_vector(I,C,T,datatype):
Cre = np.array(C.real,dtype=datatype)
Cim = np.array(C.imag,dtype=datatype)
M = np.zeros(Cre.shape,dtype=datatype)
zreal=0
zimag=0
for i in range(I):
M[zreal*zreal+zimag*zimag<T**2] = i/I
zreal = evaluate("zreal*zreal-zimag*zimag+Cre") #complex multiplication rule
zimag = evaluate("2*zreal*zimag+Cim") #complex multiplication rule
N = len(M[0])
M = np.reshape(np.array(M),(N//2,N)).astype(datatype)
M = np.concatenate((M,M[::-1]),axis=0)
return M
def create_C(N,split):
C_re = np.linspace(np.full((1,N),-2)[0],np.full((1,N),1)[0],N).T
C_im = np.linspace(np.full((1,N),1.5*1j)[0],np.full((1,N),-1.5*1j)[0],N)
C = C_re+C_im
C = C[:N//2,:]
if split != 0:
C_split = np.array_split(C,split)
else:
C_split = C
return np.array(C_split)
C = create_C(N, 0)
t0_32 = timer()
M32 = mandel_brot_vector(I,C,T,np.float32)
t_32 = timer() - t0_32
t0_64 = timer()
M64 = mandel_brot_vector(I,C,T,np.float64)
t_64 = timer() - t0_64
plt.matshow(M64,cmap="hot")
print(" "*10,f"N={N}")
print(f"{'Float 32':<20}{t_32:<40}",
f"\n{'Float 64':<20}{t_64:<40}"
)
Currently the image I get: wrong mandelbrot. For reference, the following function will produce the correct mandelbrot set but with complex128:
def mandel_brot(I,C,T):
M = np.zeros(C.shape)
z=0
for i in range(I):
M[np.abs(z)<T] = i/I
z = evaluate("z*z+C")
N = len(M[0])
M = np.reshape(np.array(M),(N//2,N)).astype(datatype)
M = np.concatenate((M,M[::-1]),axis=0)
return M
Hope someone can help solve this issue, thanks in advance. Btw do not bother with the split of the C array, it is set up to run with multiprocessing which I am not using in the code attached.
Can this be done without a loop?
import numpy as np
n = 10
x = np.random.random(n+1)
a, b = 0.45, 0.55
for i in range(n):
x = a*x[:-1] + b*x[1:]
I came across this setup in another question. There it was a covered by a little obscure nomenclature. I guess it is related to Binomial options pricing model but don't quite understand the topic to be honest. I just was intrigued by the formula and this iterative update / shrinking of x and wondered if it can be done without a loop. But I can not wrap my head around it and I am not sure if this is even possible.
What makes me think that it might work is that this vatiaton
n = 10
a, b = 0.301201, 0.59692
x0 = 123
x = x0
for i in range(n):
x = a*x + b*x
# ~42
is actually just x0*(a + b)**n
print(np.allclose(x, x0*(a + b)**n))
# True
You are calculating:
sum( a ** (n - i) * b ** i * x[i] * choose(n, i) for 0 <= i <= n)
[That's meant to be pseudocode, not Python.] I'm not sure of the best way to convert that into Numpy.
choose(n, i) is n!/ (i! (n-i)!), not the numpy choose function.
Using #mathfux's comment, one can do
import numpy as np
from scipy.stats import binom
binomial = binom(p=p, n=n)
pmf = binomial(np.arange(n+1))
res = np.sum(x * pmf)
So
res = x.copy()
for i in range(n):
res = p*res[1:] + (p-1)*res[:-1]
is just the expected value of a binomial distributed random variable x.
So I have two matrices, A and B, and I want to compute the min-plus product as given here: Min-plus matrix multiplication. For that I've implemented the following:
def min_plus_product(A,B):
B = np.transpose(B)
Y = np.zeros((len(B),len(A)))
for i in range(len(B)):
Y[i] = (A + B[i]).min(1)
return np.transpose(Y)
This works fine, but is slow for big matrices, is there a way to make it faster? I've heard that implemeting in C or using the GPU might be good options.
Here is an algo that saves a bit if the middle dimension is large enough and entries are uniformly distributed. It exploits the fact that the smallest sum typically will be from two small terms.
import numpy as np
def min_plus_product(A,B):
B = np.transpose(B)
Y = np.zeros((len(B),len(A)))
for i in range(len(B)):
Y[i] = (A + B[i]).min(1)
return np.transpose(Y)
def min_plus_product_opt(A,B, chop=None):
if chop is None:
# not sure this is optimal
chop = int(np.ceil(np.sqrt(A.shape[1])))
B = np.transpose(B)
Amin = A.min(1)
Y = np.zeros((len(B),len(A)))
for i in range(len(B)):
o = np.argsort(B[i])
Y[i] = (A[:, o[:chop]] + B[i, o[:chop]]).min(1)
if chop < len(o):
idx = np.where(Amin + B[i, o[chop]] < Y[i])[0]
for j in range(chop, len(o), chop):
if len(idx) == 0:
break
x, y = np.ix_(idx, o[j : j + chop])
slmin = (A[x, y] + B[i, o[j : j + chop]]).min(1)
slmin = np.minimum(Y[i, idx], slmin)
Y[i, idx] = slmin
nidx = np.where(Amin[idx] + B[i, o[j + chop]] < Y[i, idx])[0]
idx = idx[nidx]
return np.transpose(Y)
A = np.random.random(size=(1000,1000))
B = np.random.random(size=(1000,2000))
print(np.allclose(min_plus_product(A,B), min_plus_product_opt(A,B)))
import time
t = time.time();min_plus_product(A,B);print('naive {}sec'.format(time.time()-t))
t = time.time();min_plus_product_opt(A,B);print('opt {}sec'.format(time.time()-t))
Sample output:
True
naive 7.794037580490112sec
opt 1.65810227394104sec
A possible simple route is to use numba.
from numba import autojit
import numpy as np
#autojit(nopython=True)
def min_plus_product(A,B):
n = A.shape[0]
C = np.zeros((n,n))
for i in range(n):
for j in range(n):
minimum = A[i,0]+B[0,j]
for k in range(1,n):
minimum = min(A[i,k]+B[k,j],minimum)
C[i,j] = minimum
return C
Timings on 1000x1000 A,B matrices are:
1 loops, best of 3: 4.28 s per loop for the original code
1 loops, best of 3: 2.32 s per loop for the numba code
Here is a succinct and fully numpy solution, without any python-based loops:
(np.expand_dims(a, 0) + np.expand_dims(b.T, 1)).min(axis=2).T
I have a system of equations in the form of A*x = B where [A] is a tridiagonal coefficient matrix. Using the Numpy solver numpy.linalg.solve I can solve the system of equations for x.
See example below of how I develop the tridiagonal [A] martix. the {B} vector, and solve for x:
# Solve system of equations with a tridiagonal coefficient matrix
# uses numpy.linalg.solve
# use Python 3 print function
from __future__ import print_function
from __future__ import division
# modules
import numpy as np
import time
ti = time.clock()
#---- Build [A] array and {B} column vector
m = 1000 # size of array, make this 8000 to see time benefits
A = np.zeros((m, m)) # pre-allocate [A] array
B = np.zeros((m, 1)) # pre-allocate {B} column vector
A[0, 0] = 1
A[0, 1] = 2
B[0, 0] = 1
for i in range(1, m-1):
A[i, i-1] = 7 # node-1
A[i, i] = 8 # node
A[i, i+1] = 9 # node+1
B[i, 0] = 2
A[m-1, m-2] = 3
A[m-1, m-1] = 4
B[m-1, 0] = 3
print('A \n', A)
print('B \n', B)
#---- Solve using numpy.linalg.solve
x = np.linalg.solve(A, B) # solve A*x = B for x
print('x \n', x)
#---- Elapsed time for each approach
print('NUMPY time', time.clock()-ti, 'seconds')
So my question relates to two sections of the above example:
Since I am dealing with a tridiagonal matrix for [A], also called a banded matrix, is there a more efficient way to solve the system of equations instead of using numpy.linalg.solve?
Also, is there a better way to create the tridiagonal matrix instead of using a for-loop?
The above example runs on Linux in about 0.08 seconds according to the time.clock() function.
The numpy.linalg.solve function works fine, but I'm trying to find an approach that takes advantage of the tridiagonal form of [A] in hopes of speeding up the solution even further and then apply that approach to a more complicated example.
There are two immediate performance improvements (1) do not use a loop, (2) use scipy.linalg.solve_banded().
I would write the code something more like
import scipy.linalg as la
# Create arrays and set values
ab = np.zeros((3,m))
b = 2*ones(m)
ab[0] = 9
ab[1] = 8
ab[2] = 7
# Fix end points
ab[0,1] = 2
ab[1,0] = 1
ab[1,-1] = 4
ab[2,-2] = 3
b[0] = 1
b[-1] = 3
return la.solve_banded ((1,1),ab,b)
There may be more elegant ways to construct the matrix, but this works.
Using %timeit in ipython the original code took 112 ms for m=1000. This code takes 2.94 ms for m=10,000, an order of magnitude larger problem yet still almost two orders of magnitude faster! I did not have the patience to wait on the original code for m=10,000. Most of the time in the original may be in constructing the array, I did not test this. Regardless, for large arrays it is much more efficient to only store the non-zero values of the matrix.
There is a scipy.sparse matrix type called scipy.sparse.dia_matrix which captures the structure of your matrix well (it will store 3 arrays, in "positions" 0 (diagonal), 1 (above) and -1 (below)). Using this type of matrix you can try scipy.sparse.linalg.lsqr for solving. If your problem has an exact solution, it will be found, otherwise it will find the solution in least squares sense.
from scipy import sparse
A_sparse = sparse.dia_matrix(A)
ret_values = sparse.linalg.lsqr(A_sparse, C)
x = ret_values[0]
However, this may not be completely optimal in terms of exploiting the triadiagonal structure, there may be a theoretical way of making this faster. What this conversion does do for you is cut down the matrix multiplication expenses to the essential: Only the 3 bands are used. This, in combination with the iterative solver lsqr should already yield a speedup.
Note: I am not proposing scipy.sparse.linalg.spsolve, because it converts your matrix to csr format. However, replacing lsqr with spsolve is worth a try, especially because spsolve can bind UMFPACK, see relevant doc on spsolve. Also, it may be of interest to take a look at this stackoverflow question and answer relating to UMFPACK
You could use scipy.linalg.solveh_banded.
EDIT: You CANNOT used the above as your matrix is not symmetric and I thought it was. However, as was mentioned above in the comment, the Thomas algorithm is great for this
a = [7] * ( m - 2 ) + [3]
b = [1] + [8] * ( m - 2 ) + [4]
c = [2] + [9] * ( m - 2 )
d = [1] + [2] * ( m - 2 ) + [3]
# This is taken directly from the Wikipedia page also cited above
# this overwrites b and d
def TDMASolve(a, b, c, d):
n = len(d) # n is the numbers of rows, a and c has length n-1
for i in xrange(n-1):
d[i+1] -= 1. * d[i] * a[i] / b[i]
b[i+1] -= 1. * c[i] * a[i] / b[i]
for i in reversed(xrange(n-1)):
d[i] -= d[i+1] * c[i] / b[i+1]
return [d[i] / b[i] for i in xrange(n)]
This code is not optimize nor does it use np, but if I (or any of the other fine folks here) have time, I will edit it so that it does those thing. It currently times at ~10 ms for m=10000.
This probably will help
There is a function creates_tridiagonal which will create tridiagonal matrix. There is another function which converts a matrix into diagonal ordered form as requested by SciPy solve_banded function.
import numpy as np
def lu_decomp3(a):
"""
c,d,e = lu_decomp3(a).
LU decomposition of tridiagonal matrix a = [c\d\e]. On output
{c},{d} and {e} are the diagonals of the decomposed matrix a.
"""
n = np.diagonal(a).size
assert(np.all(a.shape ==(n,n))) # check if square matrix
d = np.copy(np.diagonal(a)) # without copy (assignment destination is read-only) error is raised
e = np.copy(np.diagonal(a, 1))
c = np.copy(np.diagonal(a, -1))
for k in range(1,n):
lam = c[k-1]/d[k-1]
d[k] = d[k] - lam*e[k-1]
c[k-1] = lam
return c,d,e
def lu_solve3(c,d,e,b):
"""
x = lu_solve(c,d,e,b).
Solves [c\d\e]{x} = {b}, where {c}, {d} and {e} are the
vectors returned from lu_decomp3.
"""
n = len(d)
y = np.zeros_like(b)
y[0] = b[0]
for k in range(1,n):
y[k] = b[k] - c[k-1]*y[k-1]
x = np.zeros_like(b)
x[n-1] = y[n-1]/d[n-1] # there is no x[n] out of range
for k in range(n-2,-1,-1):
x[k] = (y[k] - e[k]*x[k+1])/d[k]
return x
from scipy.sparse import diags
def create_tridiagonal(size = 4):
diag = np.random.randn(size)*100
diag_pos1 = np.random.randn(size-1)*10
diag_neg1 = np.random.randn(size-1)*10
a = diags([diag_neg1, diag, diag_pos1], offsets=[-1, 0, 1],shape=(size,size)).todense()
return a
a = create_tridiagonal(4)
b = np.random.randn(4)*10
print('matrix a is\n = {} \n\n and vector b is \n {}'.format(a, b))
c, d, e = lu_decomp3(a)
x = lu_solve3(c, d, e, b)
print("x from our function is {}".format(x))
print("check is answer correct ({})".format(np.allclose(np.dot(a, x), b)))
## Test Scipy
from scipy.linalg import solve_banded
def diagonal_form(a, upper = 1, lower= 1):
"""
a is a numpy square matrix
this function converts a square matrix to diagonal ordered form
returned matrix in ab shape which can be used directly for scipy.linalg.solve_banded
"""
n = a.shape[1]
assert(np.all(a.shape ==(n,n)))
ab = np.zeros((2*n-1, n))
for i in range(n):
ab[i,(n-1)-i:] = np.diagonal(a,(n-1)-i)
for i in range(n-1):
ab[(2*n-2)-i,:i+1] = np.diagonal(a,i-(n-1))
mid_row_inx = int(ab.shape[0]/2)
upper_rows = [mid_row_inx - i for i in range(1, upper+1)]
upper_rows.reverse()
upper_rows.append(mid_row_inx)
lower_rows = [mid_row_inx + i for i in range(1, lower+1)]
keep_rows = upper_rows+lower_rows
ab = ab[keep_rows,:]
return ab
ab = diagonal_form(a, upper=1, lower=1) # for tridiagonal matrix upper and lower = 1
x_sp = solve_banded((1,1), ab, b)
print("is our answer the same as scipy answer ({})".format(np.allclose(x, x_sp)))