I am trying to compute the accelerations due to gravity for an n-body problem in 3-space (I'm using symplectic Euler).
I have position and velocity vectors for each time step, and am using the below (working) code to calculate accelerations and update velocity and position. Note that the accelerations are vectors in 3-space, not just magnitudes.
I would like to know if there's a more efficient way to compute this with numpy to avoid the loops.
def accelerations(positions, masses):
- positions: numpy array of size (n,3)
- masses: numpy array of size (n,)
- accelerations: numpy of size (n,3), the acceleration vectors in 3-space
n_bodies = len(masses)
accelerations = numpy.zeros([n_bodies,3]) # n_bodies * (x,y,z)
# vectors from mass(i) to mass(j)
D = numpy.zeros([n_bodies,n_bodies,3]) # n_bodies * n_bodies * (x,y,z)
for i, j in itertools.product(range(n_bodies), range(n_bodies)):
D[i][j] = positions[j]-positions[i]
# Acceleration due to gravitational force between each pair of bodies
A = numpy.zeros((n_bodies, n_bodies,3))
for i, j in itertools.product(range(n_bodies), range(n_bodies)):
if numpy.linalg.norm(D[i][j]) > epsilon:
A[i][j] = gravitational_constant * masses[j] * D[i][j] \
/ numpy.linalg.norm(D[i][j])**3
# Calculate net acceleration of each body (vectors in 3-space)
accelerations = numpy.sum(A, axis=1) # sum of accel vectors for each body of shape (n_bodies,3)
return accelerations
Here is an optimized version using blas. blas has special routines for linear algebra on symmetric or Hermitian matrices. These use specialized, packed storage, keeping only the upper or lower triangle and leaving out the (redundant) mirrored entries. That way blas saves not only ~half the storage but also ~half the flops.
I've put quite a few comments to make it readable.
import numpy as np
import itertools
from scipy.linalg.blas import zhpr, dspr2, zhpmv
def acc_vect(pos, mas):
n = mas.size
d2 = pos#(-2*pos.T)
diag = -0.5 * np.einsum('ii->i', d2)
d2 += diag + diag[:, None]
np.einsum('ii->i', d2)[...] = 1
return np.nansum((pos[:, None, :] - pos) * (mas[:, None] * d2**-1.5)[..., None], axis=0)
def acc_blas(pos, mas):
n = mas.size
# trick: use complex Hermitian to get the packed anti-symmetric
# outer difference in the imaginary part of the zhpr answer
# don't want to sum over dimensions yet, therefore must do them one-by-one
trck = np.zeros((3, n * (n + 1) // 2), complex)
for a, p in zip(trck, pos.T - 1j):
zhpr(n, -2, p, a, 1, 0, 0, 1)
# does a -> a + alpha x x^H
# parameters: n -- matrix dimension
# alpha -- real scalar
# x -- complex vector
# ap -- packed Hermitian n x n matrix a
# i.e. an n(n+1)/2 vector
# incx -- x stride
# offx -- x offset
# lower -- is storage of ap lower or upper
# overwrite_ap -- whether to change a inplace
# as a by-product we get pos pos^T:
ppT = trck.real.sum(0) + 6
# now compute matrix of squared distances ...
# ... using (A-B)^2 = A^2 + B^2 - 2AB
# ... that and the outer sum X (+) X.T equals X ones^T + ones X^T
dspr2(n, -0.5, ppT[np.r_[0, 2:n+1].cumsum()], np.ones((n,)), ppT,
1, 0, 1, 0, 0, 1)
# does a -> a + alpha x y^T + alpha y x^T in packed symmetric storage
# scale anti-symmetric differences by distance^-3
np.divide(trck.imag, ppT*np.sqrt(ppT), where=ppT.astype(bool),
# it remains to scale by mass and sum
# this can be done by matrix multiplication with the vector of masses ...
# ... unfortunately because we need anti-symmetry we need to work
# with Hermitian storage, i.e. complex numbers, even though the actual
# computation is only real:
out = np.zeros((3, n), complex)
for a, o in zip(trck, out):
zhpmv(n, 0.5, a, mas*-1j, 1, 0, 0, o, 1, 0, 0, 1)
# multiplies packed Hermitian matrix by vector
return out.real.T
def accelerations(positions, masses, epsilon=1e-6, gravitational_constant=1.0):
- positions: numpy array of size (n,3)
- masses: numpy array of size (n,)
n_bodies = len(masses)
accelerations = np.zeros([n_bodies,3]) # n_bodies * (x,y,z)
# vectors from mass(i) to mass(j)
D = np.zeros([n_bodies,n_bodies,3]) # n_bodies * n_bodies * (x,y,z)
for i, j in itertools.product(range(n_bodies), range(n_bodies)):
D[i][j] = positions[j]-positions[i]
# Acceleration due to gravitational force between each pair of bodies
A = np.zeros((n_bodies, n_bodies,3))
for i, j in itertools.product(range(n_bodies), range(n_bodies)):
if np.linalg.norm(D[i][j]) > epsilon:
A[i][j] = gravitational_constant * masses[j] * D[i][j] \
/ np.linalg.norm(D[i][j])**3
# Calculate net accleration of each body
accelerations = np.sum(A, axis=1) # sum of accel vectors for each body
return accelerations
from numpy.linalg import norm
def acc_pm(positions, masses, G=1):
- positions: numpy array of size (n,3)
- masses: numpy array of size (n,)
mass_matrix = masses.reshape((1, -1, 1))*masses.reshape((-1, 1, 1))
disps = positions.reshape((1, -1, 3)) - positions.reshape((-1, 1, 3)) # displacements
dists = norm(disps, axis=2)
dists[dists == 0] = 1 # Avoid divide by zero warnings
forces = G*disps*mass_matrix/np.expand_dims(dists, 2)**3
return forces.sum(axis=1)/masses.reshape(-1, 1)
n = 500
pos = np.random.random((n, 3))
mas = np.random.random((n,))
from timeit import timeit
print(f"loops: {timeit('accelerations(pos, mas)', globals=globals(), number=1)*1000:10.3f} ms")
print(f"pmende: {timeit('acc_pm(pos, mas)', globals=globals(), number=10)*100:10.3f} ms")
print(f"vectorized: {timeit('acc_vect(pos, mas)', globals=globals(), number=10)*100:10.3f} ms")
print(f"blas: {timeit('acc_blas(pos, mas)', globals=globals(), number=10)*100:10.3f} ms")
A = accelerations(pos, mas)
AV = acc_vect(pos, mas)
AB = acc_blas(pos, mas)
AP = acc_pm(pos, mas)
assert np.allclose(A, AV) and np.allclose(AB, AV) and np.allclose(AP, AV)
Sample run; comparing to OP, my pure numpy vectorization and #P Mende's.
loops: 3213.130 ms
pmende: 41.480 ms
vectorized: 43.860 ms
blas: 7.726 ms
We can see that
1) P Mende is slightly better than I at vectorizing
2) blas is ~5 times as fast; please note that my blas is not very good; I suspect with an optimized blas you may get even better (numpy would be expected to run faster too on a better blas, though)
3) any of the answers is much faster than loops
A follow up to my comments on your original post:
from numpy.linalg import norm
def accelerations(positions, masses):
- positions: numpy array of size (n,3)
- masses: numpy array of size (n,)
mass_matrix = masses.reshape((1, -1, 1))*masses.reshape((-1, 1, 1))
disps = positions.reshape((1, -1, 3)) - positions.reshape((-1, 1, 3)) # displacements
dists = norm(disps, axis=2)
dists[dists == 0] = 1 # Avoid divide by zero warnings
forces = G*disps*mass_matrix/np.expand_dims(dists, 2)**3
return forces.sum(axis=1)/masses.reshape(-1, 1)
Some things to consider:
You only need half the distances; once you've calculated D[i][j], that's the same as -D[j][i].
You can do df2 = df.apply(lambda x:gravitational_constant/x**3)
You can generate a dataframe that records, for each pair of bodies, the product of their masses. You only have do that once, and then you can pass it to accelearations every time you call it.
Then df.product(df2).product(mass_products).sum().div(masses) gives you the accelerations.
Thank you for all of your constructive criticisim on my last post. I have made some changes, but alas my code is still not working and I can't figure out why. What happens when I run this version is that I get a runtime warning about invalid errors encountered in matmul.
My code is given as
from __future__ import division
import numpy as np
from scipy.linalg import eig
from scipy.linalg import toeplitz
def poldif(*arg):
Calculate differentiation matrices on arbitrary nodes.
Returns the differentiation matrices D1, D2, .. DM corresponding to the
M-th derivative of the function f at arbitrarily specified nodes. The
differentiation matrices can be computed with unit weights or
with specified weights.
x : ndarray
vector of N distinct nodes
M : int
maximum order of the derivative, 0 < M <= N - 1
OR (when computing with specified weights)
x : ndarray
vector of N distinct nodes
alpha : ndarray
vector of weight values alpha(x), evaluated at x = x_j.
B : int
matrix of size M x N, where M is the highest derivative required.
It should contain the quantities B[l,j] = beta_{l,j} =
l-th derivative of log(alpha(x)), evaluated at x = x_j.
DM : ndarray
M x N x N array of differentiation matrices
This function returns M differentiation matrices corresponding to the
1st, 2nd, ... M-th derivates on arbitrary nodes specified in the array
x. The nodes must be distinct but are, otherwise, arbitrary. The
matrices are constructed by differentiating N-th order Lagrange
interpolating polynomial that passes through the speficied points.
The M-th derivative of the grid function f is obtained by the matrix-
vector multiplication
.. math::
f^{(m)}_i = D^{(m)}_{ij}f_j
This function is based on code by Rex Fuzzle
..[1] B. Fornberg, Generation of Finite Difference Formulas on Arbitrarily
Spaced Grids, Mathematics of Computation 51, no. 184 (1988): 699-706.
..[2] J. A. C. Weidemann and S. C. Reddy, A MATLAB Differentiation Matrix
Suite, ACM Transactions on Mathematical Software, 26, (2000) : 465-519
if len(arg) > 3:
raise Exception('number of arguments is either two OR three')
if len(arg) == 2:
# unit weight function : arguments are nodes and derivative order
x, M = arg[0], arg[1]
N = np.size(x)
# assert M<N, "Derivative order cannot be larger or equal to number of points"
if M >= N:
raise Exception("Derivative order cannot be larger or equal to number of points")
alpha = np.ones(N)
B = np.zeros((M, N))
elif len(arg) == 3:
# specified weight function : arguments are nodes, weights and B matrix
x, alpha, B = arg[0], arg[1], arg[2]
N = np.size(x)
M = B.shape[0]
I = np.eye(N) # identity matrix
L = np.logical_or(I, np.zeros(N)) # logical identity matrix
XX = np.transpose(np.array([x, ] * N))
DX = XX - np.transpose(XX) # DX contains entries x(k)-x(j)
DX[L] = np.ones(N) # put 1's one the main diagonal
c = alpha * np.prod(DX, 1) # quantities c(j)
C = np.transpose(np.array([c, ] * N))
C = C / np.transpose(C) # matrix with entries c(k)/c(j).
Z = 1 / DX # Z contains entries 1/(x(k)-x(j)
Z[L] = 0 # eye(N)*ZZ; # with zeros on the diagonal.
X = np.transpose(np.copy(Z)) # X is same as Z', but with ...
Xnew = X
for i in range(0, N):
Xnew[i:N - 1, i] = X[i + 1:N, i]
X = Xnew[0:N - 1, :] # ... diagonal entries removed
Y = np.ones([N - 1, N]) # initialize Y and D matrices.
D = np.eye(N) # Y is matrix of cumulative sums
DM = np.empty((M, N, N)) # differentiation matrices
for ell in range(1, M + 1):
Y = np.cumsum(np.vstack((B[ell - 1, :], ell * (Y[0:N - 1, :]) * X)), 0) # diags
D = ell * Z * (C * np.transpose(np.tile(np.diag(D), (N, 1))) - D) # off-diags
D[L] = Y[N - 1, :]
DM[ell - 1, :, :] = D
return DM
def herdif(N, M, b=1):
Calculate differentiation matrices using Hermite collocation.
Returns the differentiation matrices D1, D2, .. DM corresponding to the
M-th derivative of the function f, at the N Chebyshev nodes in the
interval [-1,1].
N : int
number of grid points
M : int
maximum order of the derivative, 0 < M < N
b : float, optional
scale parameter, real and positive
x : ndarray
N x 1 array of Hermite nodes which are zeros of the N-th degree
Hermite polynomial, scaled by b
DM : ndarray
M x N x N array of differentiation matrices
This function returns M differentiation matrices corresponding to the
1st, 2nd, ... M-th derivates on a Hermite grid of N points. The
matrices are constructed by differentiating N-th order Hermite
The M-th derivative of the grid function f is obtained by the matrix-
vector multiplication
.. math::
f^{(m)}_i = D^{(m)}_{ij}f_j
..[1] B. Fornberg, Generation of Finite Difference Formulas on Arbitrarily
Spaced Grids, Mathematics of Computation 51, no. 184 (1988): 699-706.
..[2] J. A. C. Weidemann and S. C. Reddy, A MATLAB Differentiation Matrix
Suite, ACM Transactions on Mathematical Software, 26, (2000) : 465-519
..[3] R. Baltensperger and M. R. Trummer, Spectral Differencing With A
Twist, SIAM Journal on Scientific Computing 24, (2002) : 1465-1487
if M >= N - 1:
raise Exception('number of nodes must be greater than M - 1')
if M <= 0:
raise Exception('derivative order must be at least 1')
x = herroots(N) # compute Hermite nodes
alpha = np.exp(-x * x / 2) # compute Hermite weights.
beta = np.zeros([M + 1, N])
# construct beta(l,j) = d^l/dx^l (alpha(x)/alpha'(x))|x=x_j recursively
beta[0, :] = np.ones(N)
beta[1, :] = -x
for ell in range(2, M + 1):
beta[ell, :] = -x * beta[ell - 1, :] - (ell - 1) * beta[ell - 2, :]
# remove initialising row from beta
beta = np.delete(beta, 0, 0)
# compute differentiation matrix (b=1)
DM = poldif(x, alpha, beta)
# scale nodes by the factor b
x = x / b
# scale the matrix by the factor b
for ell in range(M):
DM[ell, :, :] = (b ** (ell + 1)) * DM[ell, :, :]
return x, DM
def herroots(N):
Compute roots of the Hermite polynomial of degree N
N : int
degree of the Hermite polynomial
x : ndarray
N x 1 array of Hermite roots
# Jacobi matrix
d = np.sqrt(np.arange(1, N))
J = np.diag(d, 1) + np.diag(d, -1)
# compute eigenvalues
mu = eig(J)[0]
# return sorted, normalised eigenvalues
# real part only since all roots must be real.
return np.real(np.sort(mu) / np.sqrt(2))
a = 1-1j
b = 2+0.2j
c1 = 0.34
c2 = 0.005
alpha1 = (4*c2/a)**0.25
alpha2 = b/2*a
Nx = 220;
# hermite differentiation matrices
[x,D] = herdif(Nx, 2, np.real(alpha1))
D1 = D[0,:]
D2 = D[1,:]
# integration weights
diff = np.diff(x)
p = np.concatenate([np.zeros(1), diff])
q = np.concatenate([diff, np.zeros(1)])
w = (p + q)/2
Q = np.diag(w)
#Discretised operator
const = c1*np.diag(np.ones(len(x)))-c2*(np.diag(x)*np.diag(x))
A = a*D2 - b*D1 + const
##### Timestepping
tmax = 200
tmin = 0
dt = 1
n = (tmax - tmin)/dt
tvec = np.linspace(0,tmax,n, endpoint = True)
q = np.zeros((Nx, len(tvec)),dtype=complex)
f = np.zeros((Nx, len(tvec)),dtype=complex)
q0 = np.ones(Nx)*10**4
q[:,0] = q0
# qnew - qold = dt*Aqold + dt*N(qold,qold,qold)
# qnew - qold = dt*Aqnew - dt*N(qold,qold,qold)
# therefore qnew - qold = 0.5*dtAqold + 0.5*dt*Aqnew + dtN(qold,qold,qold)
# rearranging to give qnew( 1- 0.5Adt) = (1 + 0.5Adt) + dt N(qold,qold,qold)
from numpy.linalg import inv
inverted = inv(np.eye(Nx)-0.5*A*dt)
forqold = (np.eye(Nx) + 0.5*A*dt)
firstterm = np.matmul(inverted,forqold)
for t in range(0, len(tvec)-1):
nl = abs(np.square(q[:,t]))*q[:,t]
q[:,t+1] = np.matmul(firstterm,q[:,t]) - dt*np.matmul(inverted,nl)
where the hermitedifferentiation matrices can be found online and are in a different file. This code blows up after five interations, which I cannot understand as I don't see how it differs in the matlab found here https://www.bagherigroup.com/research/open-source-codes/
I would really appreciate any help.
Error in:
q[:,t+1] = inverted*forgold*np.array(q[:,t]) + inverted*dt*np.array(nl)
q[:, t+1] indexes a 2d array (probably not a np.matrix which is more MATLAB like). This indexing reduces the number of dimensions by 1, hence the (220,) shape in the error message.
The error message says the RHS is (220,220). That shape probably comes from inverted and forgold. np.array(q[:,t]) is 1d. Multiplying a (220,220) by a (220,) is ok, but you can't put that square array into a 1d slot.
Both uses of np.array in the error line are superfluous. Their arguments are already ndarray.
As for the loop, it may be necessary. It looks like q[:,t+1] is a function of q[:,t], a serial, rather than parallel operation. Those are harder to render as 'vectorized' (unless you can usecumsum` like operations).
Note that in numpy * is elementwise multiplication, the .* of MATLAB. np.dot and # are used for matrix multiplication.
q[:,t+1]= invert#q[:,t]
would work
I'm using multiprocessing.pool to perform multiple integration in parallel.
In this program I integrate an equation of motion for different realization of noise by generating the dW 3D array. The first part of the program is just definition of parameters and the generation of arrays needed for the calculation.
I generate dW outside the function since I know that otherwise I have to reseed at each process to not obtain the same random sequence.
Euler(replica) function is the function which have to be parallelize. This include a for loop over a single process for the numerical integration. The arg replica is the number of the replica of my system as stored in the "replicas" array, which is the argument passed in pool.map.
import numpy as np
from multiprocessing import Pool
# parameters
N = 30 # number of sites
T = 1 # total time
dt = 0.1 # time step
l = 0 # initially localized state on site l
e = 0.0 # site energy
v = 1.0 # hopping coefficient
mu, sigma = 0, 1.0 # average and variance of the gaussian distribution
num_replicas = 8 # number of replicas of the system
processes=2 # number of processes
# identity vector which represents the diagonal of the Hamiltonian
E = np.ones(N) * e
# vector which represents the upper/lower diagonal terms of the Hopping Matrix and the Hamiltonian
V = np.ones(N-1) * v
# definition of the tight-binding Hamiltonian (tridiagonal)
H = np.diag(E) + np.diag(V, k=1) + np.diag(V, k=-1)
# corner elements of the Hamiltonian
H[0, -1] = v
H[-1, 0] = v
# time array
time_array = np.arange(0, T, dt)
# site array
site_array = np.arange(N)
# initial state
psi_0 = np.zeros((N), dtype=complex)
psi_0[l] = 1. + 0.j
#initialization of the state array
Psi = np.zeros((len(time_array), N), dtype=complex)
Psi[0,:] = psi_0
# replicas 1D array
replicas = np.arange(0, num_replicas)
# random 2D array
dW = np.random.normal(mu, 1.0, (len(time_array), num_replicas, N)) * np.sqrt(dt)
def Euler(replica):
psi_0 = np.zeros((N), dtype=complex)
psi_0[l] = 1. + 0.j
psi = psi_0
for i in np.arange(1, len(time_array)):
psi += -1.j * (H # psi) * dt - 1.j * sigma * psi * dW[i,replica,:] - 0.5 * (sigma**2) * psi * dt
psi /= np.sqrt(psi # np.conj(psi))
Psi[i,:] = psi
return Psi
pool = Pool(processes)
Psi = pool.map(Euler, replicas)
Psi = np.asarray(Psi)
Psi = np.swapaxes(Psi,0,1)
Empirically I found that if num_replicas > 4 * processes as expressed in the pool.map function, it seems that two processes take the same argument, as if the same calculation is repeated two times. Instead, from 'num_replicas <= 4*processes` I get the expected result: each process is different from the others.
This is not due to the generation of the random matrix dW, since each row is uncorrelated, so I ascribe this behavior to my use of multiprocessing.pool.
I think you should initialize your psi_0 and "Psi" inside the Euler function.
I tried to reproduce your results and, indeed, I found that when num_replicas > 4 * processes you get the same results from multiple processors. But I think this is due to the fact that Psi, in your case, it's a global variable.
Modifying the code as following it gives different results for each num_replicas (by the way, why do you use site_array? It is not used anywhere).
import numpy as np
from multiprocessing import Pool
# parameters
N = 3 # number of sites
T = 1 # total time
dt = 0.1 # time step
l = 0 # initially localized state on site l
e = 0.0 # site energy
v = 1.0 # hopping coefficient
mu, sigma = 0, 1.0 # average and variance of the gaussian distribution
num_replicas = 10 # number of replicas of the system
processes=2 # number of processes
# identity vector which represents the diagonal of the Hamiltonian
E = np.ones(N) * e
# vector which represents the upper/lower diagonal terms of the Hopping Matrix and the Hamiltonian
V = np.ones(N-1) * v
# definition of the tight-binding Hamiltonian (tridiagonal)
H = np.diag(E) + np.diag(V, k=1) + np.diag(V, k=-1)
# corner elements of the Hamiltonian
H[0, -1] = v
H[-1, 0] = v
# time array
time_array = np.arange(0, T, dt)
## site array
#site_array = np.arange(N)
# replicas 1D array
replicas = np.arange(0, num_replicas)
# random 2D array
dW = np.random.normal(mu, 1.0, (len(time_array), num_replicas, N)) * np.sqrt(dt)
#dW = np.random.normal(mu, 1.0, (len(time_array), num_replicas, N)) * np.sqrt(dt)
def Euler(replica):
# initial state
psi_0 = np.zeros((N), dtype=complex)
psi_0[l] = 1. + 0.j
#initialization of the state array
Psi = np.zeros((len(time_array), N), dtype=complex)
Psi[0,:] = psi_0
psi_0 = np.zeros((N), dtype=complex)
psi_0[l] = 1. + 0.j
psi = psi_0
# print(dW[0,replica,0])
for i in np.arange(1, len(time_array)):
psi += -1.j * (H # psi) * dt - 1.j * sigma * psi * dW[i,replica,:] - 0.5 * (sigma**2) * psi * dt
psi /= np.sqrt(psi # np.conj(psi))
Psi[i,:] = psi
return Psi
pool = Pool(processes)
Psi = pool.map(Euler, replicas)
Psi = np.asarray(Psi)
Psi = np.swapaxes(Psi,0,1)
as #Fabrizio pointed out, Psi is shared between invocations of Euler. this is generally a bad thing to do and another example of why it's a bad idea to have "global mutable state". it's just too easy for things to break in unexpected ways
the reason it fails in this case is subtle and due to the way Pool.map accumulates several results in each process before pickling them and returning them to the parent/controlling process. you can see this by setting the chunksize parameter of map to 1, causing it return results immediately and hence not overwriting intermediate results
it's equivalent to the following minimal working example:
from multiprocessing import Pool
arr = [None]
def mutate_global(i):
arr[0] = i
return arr
with Pool(2) as pool:
out = pool.map(mutate_global, range(10), chunksize=5)
the last time I ran this I got:
[[4], [4], [4], [4], [4], [9], [9], [9], [9], [9]]
you can change the chunksize parameter to get an idea of what it's doing, or maybe run with the following version:
def mutate_local(i):
arr = [None]
arr[0] = i
return arr
which "just works", and is the equlivelant to doing what #Fabrizio describes where you create Phi inside Euler rather than using a single global variable
I'm trying to do a particle in a box simulation with no potential field. Took me some time to find out that simple explicit and implicit methods break unitary time evolution so I resorted to crank-nicolson, which is supposed to be unitary. But when I try it I find that it still is not so. I'm not sure what I'm missing.. The formulation I used is this:
where T is the tridiagonal Toeplitz matrix for the second derivative wrt x and
The system simplifies to
The A and B matrices are:
I just solve this linear system for using the sparse module. The math makes sense and I found the same numeric scheme in some papers so that led me to believe my code is where the problem is.
Here's my code so far:
import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import toeplitz
from scipy.sparse.linalg import spsolve
from scipy import sparse
# Spatial discretisation
N = 100
x = np.linspace(0, 1, N)
dx = x[1] - x[0]
# Time discretisation
K = 10000
t = np.linspace(0, 10, K)
dt = t[1] - t[0]
alpha = (1j * dt) / (2 * (dx ** 2))
A = sparse.csc_matrix(toeplitz([1 + 2 * alpha, -alpha, *np.zeros(N-4)]), dtype=np.cfloat) # 2 less for both boundaries
B = sparse.csc_matrix(toeplitz([1 - 2 * alpha, alpha, *np.zeros(N-4)]), dtype=np.cfloat)
# Initial and boundary conditions (localized gaussian)
psi = np.exp((1j * 50 * x) - (200 * (x - .5) ** 2))
b = B.dot(psi[1:-1])
psi[0], psi[-1] = 0, 0
for index, step in enumerate(t):
# Within the domain
psi[1:-1] = spsolve(A, b)
# Enforce boundaries
# psi[0], psi[N - 1] = 0, 0
b = B.dot(psi[1:-1])
# Square integration to show if it's unitary
print(np.trapz(np.abs(psi) ** 2, dx))
You are relying on the Toeplitz constructor to produce a symmetric matrix, so that the entries below the diagonal are the same as above the diagonal. However, the documentation for scipy.linalg.toeplitz(c, r=None) says not "transpose", but
*"If r is not given, r == conjugate(c) is assumed."
so that the resulting matrix is self-adjoint. In this case this means that the entries above the diagonal have their sign switched.
It makes no sense to first construct a dense matrix and then extract a sparse representation. Construct it as sparse tridiagonal matrix from the start, using scipy.sparse.diags
A = sparse.diags([ (N-3)*[-alpha], (N-2)*[1+2*alpha], (N-3)*[-alpha]], [-1,0,1], format="csc");
B = sparse.diags([ (N-3)*[ alpha], (N-2)*[1-2*alpha], (N-3)*[ alpha]], [-1,0,1], format="csc");
I have a numpy array filled with intensity readings at different radii in a uniform circle (for context, this is a 1D radiative transfer project for protostellar formation models: while much better models exist, my supervisor wasnts me to have the experience of producing one so I understand how others work).
I want to take that 1d array, and "rotate" it through a circle, forming a 2D array of intensities that could then be shown with imshow (or, with a bit of work, aplpy). The final array needs to be 2d, and the projection needs to be Cartesian, not polar.
I can do it with nested for loops, and I can do it with lookup tables, but I have a feeling there must be a neat way of doing it in numpy or something.
Any ideas?
I have had to go back and recreate my (frankly horrible) mess of for loops and if statements that I had before. If I really tried, I could probably get rid of one of the loops and one of the if statements by condensing things down. However, the aim is not to make it work with for loops, but see if there is a built in way to rotate the array.
impB is an array that differs slightly from what I stated it was before. Its actually just a list of radii where particles are detected. I then bin those into radius bins to get the intensity (or frequency if you prefer) in each radius. R is the scale factor for my radius as I run the model in a dimensionless way. iRes is a resolution scale factor, essentially how often I want to sample my radial bins. Everything else should be clear.
radJ = np.ndarray(shape=(2*iRes, 2*iRes)) # Create array of 2xRadius square
for i in range(iRes):
n = len(impB[np.where(impB[:] < ((i+1.) * (R / iRes)))]) # Count number of things within this radius +1
m = len(impB[np.where(impB[:] <= ((i) * (R / iRes)))]) # Count number of things in this radius
a = (((i + 1) * (R / iRes))**2 - ((i) * (R / iRes))**2) * math.pi # A normalisation factor based on area.....dont ask
for x in range(iRes):
for y in range(iRes):
if (x**2 + y**2) < (i * iRes)**2:
if (x**2 + y**2) >= (i * iRes)**2: # Checks for radius, and puts in cartesian space
radJ[x+iRes,y+iRes] = (n-m) / a # Put in actual intensity bins
radJ[x+iRes,-y+iRes] = (n-m) / a
radJ[-x+iRes,y+iRes] = (n-m) / a
radJ[-x+iRes,-y+iRes] = (n-m) / a
Nested loops are a simple approach for that. With ri_data_r and y containing your radius values (difference to the middle pixel) and the array for rotation, respectively, I would suggest:
from scipy import interpolate
import numpy as np
y = np.random.rand(100)
ri_data_r = np.linspace(-len(y)/2,len(y)/2,len(y))
interpol_index = interpolate.interp1d(ri_data_r, y)
xv = np.arange(-1, 1, 0.01) # adjust your matrix values here
X, Y = np.meshgrid(xv, xv)
profilegrid = np.ones(X.shape, float)
for i, x in enumerate(X[0, :]):
for k, y in enumerate(Y[:, 0]):
current_radius = np.sqrt(x ** 2 + y ** 2)
profilegrid[i, k] = interpol_index(current_radius)
This will give you exactly what you are looking for. You just have to take in your array and calculate an symmetric array ri_data_r that has the same length as your data array and contains the distance between the actual data and the middle of the array. The code is doing this automatically.
I stumbled upon this question in a different context and I hope I understood it right. Here are two other ways of doing this. The first uses skimage.transform.warp with interpolation of desired order (here we use order=0 Nearest-neighbor). This method is slower but more precise and needs less memory then the second method.
The second one does not use interpolation, therefore is faster but also less precise and needs way more memory because it stores each 2D array containing one tilt until the end, where they are averaged with np.nanmean().
The difference between both solutions stemmed from the problem of handling the center of the final image where the tilts overlap the most, i.e. the first one would just add values with each tilt ending up out of the original range. This was "solved" by clipping the matrix in each step to a global_min and global_max (consult the code). The second one solves it by taking the mean of the tilts where they overlap, which forces us to use the np.nan.
Please, read the Example of usage and Sanity check sections in order to understand the plot titles.
Solution 1:
import numpy as np
from skimage.transform import warp
def rotate_vector(vector, deg_angle):
# Credit goes to skimage.transform.radon
assert vector.ndim == 1, 'Pass only 1D vectors, e.g. use array.ravel()'
center = vector.size // 2
square = np.zeros((vector.size, vector.size))
square[center,:] = vector
rad_angle = np.deg2rad(deg_angle)
cos_a, sin_a = np.cos(rad_angle), np.sin(rad_angle)
R = np.array([[cos_a, sin_a, -center * (cos_a + sin_a - 1)],
[-sin_a, cos_a, -center * (cos_a - sin_a - 1)],
[0, 0, 1]])
# Approx. 80% of time is spent in this function
return warp(square, R, clip=False, output_shape=((vector.size, vector.size)))
def place_vectors(vectors, deg_angles):
matrix = np.zeros((vectors.shape[-1], vectors.shape[-1]))
global_min, global_max = 0, 0
for i, deg_angle in enumerate(deg_angles):
tilt = rotate_vector(vectors[i], deg_angle)
global_min = tilt.min() if global_min > tilt.min() else global_min
global_max = tilt.max() if global_max < tilt.max() else global_max
matrix += tilt
matrix = np.clip(matrix, global_min, global_max)
return matrix
Solution 2:
Credit for the idea goes to my colleague Michael Scherbela.
import numpy as np
def rotate_vector(vector, deg_angle):
assert vector.ndim == 1, 'Pass only 1D vectors, e.g. use array.ravel()'
square = np.ones([vector.size, vector.size]) * np.nan
radius = vector.size // 2
r_values = np.linspace(-radius, radius, vector.size)
rad_angle = np.deg2rad(deg_angle)
ind_x = np.round(np.cos(rad_angle) * r_values + vector.size/2).astype(np.int)
ind_y = np.round(np.sin(rad_angle) * r_values + vector.size/2).astype(np.int)
ind_x = np.clip(ind_x, 0, vector.size-1)
ind_y = np.clip(ind_y, 0, vector.size-1)
square[ind_y, ind_x] = vector
return square
def place_vectors(vectors, deg_angles):
matrices = []
for deg_angle, vector in zip(deg_angles, vectors):
matrices.append(rotate_vector(vector, deg_angle))
matrix = np.nanmean(np.array(matrices), axis=0)
return np.nan_to_num(matrix, copy=False, nan=0.0)
Example of usage:
r = 100 # Radius of the circle, i.e. half the length of the vector
n = int(np.pi * r / 8) # Number of vectors, e.g. number of tilts in tomography
v = np.ones(2*r) # One vector, e.g. one tilt in tomography
V = np.array([v]*n) # All vectors, e.g. a sinogram in tomography
# Rotate 1D vector to a specific angle (output is 2D)
angle = 45
rotated = rotate_vector(v, angle)
# Rotate each row of a 2D array according to its angle (output is 2D)
angles = np.linspace(-90, 90, num=n, endpoint=False)
inplace = place_vectors(V, angles)
Sanity check:
These are just simple checks which by no means cover all possible edge cases. Depending on your use case you might want to extend the checks and adjust the method.
# I. Sanity check
# Assuming n <= πr and v = np.ones(2r)
# Then sum(inplace) should be approx. equal to (n * (2πr - n)) / π
# which is an area that should be covered by the tilts
desired_area = (n * (2 * np.pi * r - n)) / np.pi
covered_area = np.sum(inplace)
covered_frac = covered_area / desired_area
print(f'This method covered {covered_frac * 100:.2f}% '
'of the area which should be covered in total.')
# II. Sanity check
# Assuming n <= πr and v = np.ones(2r)
# Then a circle M with radius m <= r should be the largest circle which
# is fully covered by the vectors. I.e. its mean should be no less than 1.
# If n = πr then m = r.
# m = n / π
m = int(n / np.pi)
# Code for circular mask not included
mask = create_circular_mask(2*r, 2*r, center=None, radius=m)
m_area = np.mean(inplace[mask])
print(f'Full radius r={r}, radius m={m}, mean(M)={m_area:.4f}.')
Code for plotting:
import matplotlib.pyplot as plt
plt.figure(figsize=(16, 8))
rotated = np.nan_to_num(rotated) # not necessary in case of the first method
f'Output of rotate_vector(), angle={angle}°\n'
f'Sum is {np.sum(rotated):.2f} and should be {np.sum(v):.2f}')
plt.imshow(rotated, cmap=plt.cm.Greys_r)
f'Output of place_vectors(), r={r}, n={n}\n'
f'Covered {covered_frac * 100:.2f}% of the area which should be covered.\n'
f'Mean of the circle M is {m_area:.4f} and should be 1.0.')
circle=plt.Circle((r, r), m, color='r', fill=False)
plt.gcf().gca().legend([circle], [f'Circle M (m={m})'])
I have a m × n × n numpy.ndarray of m simultaneously diagonalizable square matrices and would like to use numpy to obtain their simultaneous eigenvalues.
For example, if I had
from numpy import einsum, diag, array, linalg, random
U = linalg.svd(random.random((3,3)))[2]
M = einsum(
"ij, ajk, lk",
U, [diag([2,2,0]), diag([1,-1,1])], U)
the two matrices in M are simultaneously diagonalizable, and I am looking for a way to obtain the array
array([[2., 1.],
[2., -1.],
[0., 1.]])
(up to permutation of the lines) from M. Is there a built-in or easy way to get this?
There is a fairly simple and very elegant simultaneous diagonalization algorithm based on Givens rotation that was published by Cardoso and Soulomiac in 1996:
Cardoso, J., & Souloumiac, A. (1996). Jacobi Angles for Simultaneous Diagonalization. SIAM Journal on Matrix Analysis and Applications, 17(1), 161–164. doi:10.1137/S0895479893259546
I've attached a numpy implementation of the algorithm at the end of this response. Caveat: It turns out simultaneous diagonalization is a bit of a tricky numerical problem, with no algorithm (to the best of my knowledge) that guarantees global convergence. However, the cases in which it does not work (see the paper) are degenerate and in practice I have never had the Jacobi angles algorithm fail on me.
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
Routines for simultaneous diagonalization
Arun Chaganty <arunchaganty#gmail.com>
import numpy as np
from numpy import zeros, eye, diag
from numpy.linalg import norm
def givens_rotate( A, i, j, c, s ):
Rotate A along axis (i,j) by c and s
Ai, Aj = A[i,:], A[j,:]
A[i,:], A[j,:] = c * Ai + s * Aj, c * Aj - s * Ai
return A
def givens_double_rotate( A, i, j, c, s ):
Rotate A along axis (i,j) by c and s
Ai, Aj = A[i,:], A[j,:]
A[i,:], A[j,:] = c * Ai + s * Aj, c * Aj - s * Ai
A_i, A_j = A[:,i], A[:,j]
A[:,i], A[:,j] = c * A_i + s * A_j, c * A_j - s * A_i
return A
def jacobi_angles( *Ms, **kwargs ):
Simultaneously diagonalize using Jacobi angles
HTML = "ftp://sig.enst.fr/pub/jfc/Papers/siam_note.ps.gz",
author = "Jean-Fran\c{c}ois Cardoso and Antoine Souloumiac",
journal = "{SIAM} J. Mat. Anal. Appl.",
title = "Jacobi angles for simultaneous diagonalization",
pages = "161--164",
volume = "17",
number = "1",
month = jan,
year = {1995}}
(a) Compute Givens rotations for every pair of indices (i,j) i < j
- from eigenvectors of G = gg'; g = A_ij - A_ji, A_ij + A_ji
- Compute c, s as \sqrt{x+r/2r}, y/\sqrt{2r(x+r)}
(b) Update matrices by multiplying by the givens rotation R(i,j,c,s)
(c) Repeat (a) until stopping criterion: sin theta < threshold for all ij pairs
assert len(Ms) > 0
m, n = Ms[0].shape
assert m == n
sweeps = kwargs.get('sweeps', 500)
threshold = kwargs.get('eps', 1e-8)
rank = kwargs.get('rank', m)
R = eye(m)
for _ in xrange(sweeps):
done = True
for i in xrange(rank):
for j in xrange(i+1, m):
G = zeros((2,2))
for M in Ms:
g = np.array([ M[i,i] - M[j,j], M[i,j] + M[j,i] ])
G += np.outer(g,g) / len(Ms)
# Compute the eigenvector directly
t_on, t_off = G[0,0] - G[1,1], G[0,1] + G[1,0]
theta = 0.5 * np.arctan2( t_off, t_on + np.sqrt( t_on*t_on + t_off * t_off) )
c, s = np.cos(theta), np.sin(theta)
if abs(s) > threshold:
done = False
# Update the matrices and V
for M in Ms:
givens_double_rotate(M, i, j, c, s)
#assert M[i,i] > M[j, j]
R = givens_rotate(R, i, j, c, s)
if done:
R = R.T
L = np.zeros((m, len(Ms)))
err = 0
for i, M in enumerate(Ms):
# The off-diagonal elements of M should be 0
L[:,i] = diag(M)
err += norm(M - diag(diag(M)))
return R, L, err
I am not aware of any direct solution. But why not just getting the eigenvalues and the eigenvectors of the first matrix, and using the eigenvectors to transform all other matrices to the diagonal form? Something like:
eigvals, eigvecs = np.linalg.eig(matrix1)
eigvals2 = np.diagonal(np.dot(np.dot(transpose(eigvecs), matrix2), eigvecs))
You can the add the columns to an array via hstack if you like.
UPDATE: As pointed out below, this is only valid if no degenerate eigenvalues occur. Otherwise one would have to check first for the degenerate eigenvalues, then transform the 2nd matrix to a blockdiagonal form, and diagonalize eventual blocks bigger than 1x1 separately.
I am sure there is significant room for improvement in my solution, but I have come up with the following set of three functions doing the calculation for me in a semi-robust way.
def clusters(array,
orig_indices = None,
start = 0,
"""For an array, return a permutation that sorts the numbers and the sizes of the resulting blocks of identical numbers."""
array = numpy.asarray(array)
if not len(array):
return numpy.array([]),[]
if orig_indices is None:
orig_indices = numpy.arange(len(array))
x = array[0]
close = abs(array-x) <= (atol + rtol*abs(x))
first = sum(close)
r_perm, r_sizes = clusters(
rtol, atol)
r_sizes.insert(0, first)
return numpy.concatenate((orig_indices[close], r_perm)), r_sizes
def permutation_matrix(permutation, dtype=dtype):
n = len(permutation)
P = numpy.zeros((n,n), dtype)
for i,j in enumerate(permutation):
return P
def simultaneously_diagonalize(tensor, atol=numpy.allclose.__defaults__[1]):
tensor = numpy.asarray(tensor)
old_shape = tensor.shape
size = old_shape[-1]
tensor = tensor.reshape((-1, size, size))
diag_mask = 1-numpy.eye(size)
eigvalues, diagonalizer = numpy.linalg.eig(tensor[0])
diagonalization = numpy.dot(
if numpy.allclose(diag_mask*diagonalization, 0):
return diagonalization.diagonal(axis1=-2, axis2=-1).reshape(old_shape[:-1])
perm, cluster_sizes = clusters(diagonalization[0].diagonal())
perm_matrix = permutation_matrix(perm)
diagonalization = numpy.dot(
mask = 1-scipy.linalg.block_diag(
numpy.ones((blocksize, blocksize))
for blocksize in cluster_sizes))
0)) # Assert that the matrices are co-diagonalizable
blocks = numpy.cumsum(cluster_sizes)
start = 0
other_part = []
for block in blocks:
diagonalization[1:, start:block, start:block]))
start = block
return numpy.vstack(
(diagonalization[0].diagonal(axis1=-2, axis2=-1),
If you know something about the size of the eigenvalues of the two matrices in advance, you can diagonalize a linear combination of the two matrices, with coefficients chosen to break the degeneracy. For example, if the eigenvalues of both lie between -10 and 10, you could diagonalize 100*M1 + M2. There's a slight loss of precision, but for many purposes it's good enough--and quick and easy!