I want to write simple toy code for implicitly restart Lanczos method.
Without implicit restarting, the code is perfectly working but when I turn on the restart, I cannot get proper solution
To my knowledge, newly constructed w should be orthogonal to all of the new Lanczos vectors. For the first restart, the orthogonality is well preserved but from the second restart, the orthogonality is significantly broken down and the program does not find proper eigenvalues.
I already spent several tens of hours to fix it. I almost gave up...... Here is my python code
"""
Author: Sunghwan Choi
Date Created: June 19, 2017
Python Version: 2.7 or 3.5
Reference for Lanczos algorithm
http://www.netlib.org/utk/people/JackDongarra/etemplates/node104.html
Reference for implicit restart
http://www.netlib.org/utk/people/JackDongarra/etemplates/node118.html
"""
import numpy as np
from scipy.sparse.linalg import eigsh
#from scipy.sparse import eye
from scipy.sparse import coo_matrix
from numpy import eye
def clustering(eigvals,eigvecs,tol=1e-2):
ret_eigvals=[]
ret_eigvecs=[]
for i in range(len(eigvals)):
for ret_eigval, ret_eigvec in zip (ret_eigvals,ret_eigvecs):
if (np.abs(eigvals[i]/ret_eigval-1.0)<tol ):
break
else:
ret_eigvals.append(eigvals[i])
ret_eigvecs.append(eigvecs[:,i])
ret_eigvals=np.array(ret_eigvals)
ret_eigvecs=np.array(ret_eigvecs).T
return ret_eigvals,ret_eigvecs
def check_conv(matrix, cal_eigval, cal_eigvec, tol):
indices=[]
for i in range(len(cal_eigval)):
if(np.linalg.norm(matrix.dot(cal_eigvec[:,i]) - cal_eigval[i]*cal_eigvec[:,i])< tol):
indices.append(i)
return indices
################ input
size=1600
max_step=20000
which='SA'
#implicit=False
implicit=True
energy_range=[0.0,6.0]
tol = 1e-5
n_eig=6
n_tol_check=40 # n_tol_check>n_eig ==0
######################
# generate 1D harmonic oscillator
h=0.1
matrix=-5/2*eye(size)
matrix+=4/3*(eye(size,k=1)+eye(size,k=-1))
matrix+=-1/12*(eye(size,k=2)+eye(size,k=-2))
matrix=-0.5*matrix/(h*h)
distance =lambda index: (index-size/2)*h
matrix+=np.diagflat( list(map( lambda i: 0.5*distance(i)**2, range(size))))
# solve eigenvalue problem to check validity
true_eigval,true_eigvec = eigsh(matrix,k=50,which=which)
indices = np.all([true_eigval>energy_range[0], true_eigval<energy_range[1]],axis=0)
true_eigval = true_eigval[indices]
true_eigvec = true_eigvec[:,indices]
#initialize variables
alpha=[]; beta=[]
index_v=0
restart_interval = n_tol_check+n_eig if implicit is not False else max_step
T = np.zeros((restart_interval,restart_interval))
v = np.zeros((size,restart_interval))
#Q=np.eye(restart_interval)
#generate initial vector
np.random.seed(1)
initial_vec = np.random.random(size)
#initial_vec = np.loadtxt("tmp")
w = v[:,index_v] = initial_vec/np.linalg.norm(initial_vec)
init_beta = np.linalg.norm(w)
# start Lanczos i_step
for i_step in range(max_step):
if (i_step is 0):
v[:,index_v] = w/init_beta
else:
v[:,index_v] = w/T[index_v,index_v-1]
w=matrix.dot(v[:,index_v])
if (i_step is 0):
w=w-init_beta*v[:,index_v-1]
else:
w=w-T[index_v,index_v-1]*v[:,index_v-1]
T[index_v,index_v]=np.dot(w,v[:,index_v])
w -=T[index_v,index_v]*v[:,index_v]
#check convergence
if ((i_step+1)%n_tol_check==n_eig and i_step>n_eig):
# calculate eigenval of T matrix
cal_eigval, cal_eigvec_= np.linalg.eigh(T[:index_v+1,:index_v+1])
cal_eigvec = np.dot(v[:,:index_v+1],cal_eigvec_)
#check tolerance
conv_indices = check_conv(matrix, cal_eigval, cal_eigvec,tol)
#filter energy_range
indices = np.all([cal_eigval[conv_indices]>energy_range[0], cal_eigval[conv_indices]<energy_range[1]],axis=0)
#check clustering
conv_cal_eigval,conv_cal_eigvec = clustering((cal_eigval[conv_indices])[indices], (cal_eigvec[conv_indices])[indices])
if (len(conv_cal_eigval)>=n_eig):
break
# implicit restarting
if (implicit is True):
Q=np.eye(restart_interval)
# do shift & QR decomposition
indices = np.argsort(np.abs(cal_eigval-np.mean(energy_range)))
for index in indices[n_eig:]:
new_Q,new_R = np.linalg.qr(T-cal_eigval[index]*np.eye(len(T)))
T = np.dot(new_Q.T,np.dot(T,new_Q))
v = np.dot(v,new_Q)
Q = np.dot(Q,new_Q)
w=v[:,n_eig]*T[n_eig,n_eig-1]+w*Q[-1,n_eig-1]
v[:,n_eig:]=0.0
T[:,n_eig:] = 0.0
T[n_eig:,:] = 0.0
#for debug
#print(np.dot(w.T, v))
# reset index
index_v=n_eig-1
index_v+=1
T[index_v,index_v-1]=np.linalg.norm(w)
T[index_v-1,index_v]=np.linalg.norm(w)
else:
print("not converged")
exit(-1)
print ("energy window: (", energy_range[0],",",energy_range[1],")")
print ("true eigenvalue")
print(true_eigval)
print ("eigenvalue from Lanczos w/ implicit restart (",i_step+1,")")
print(conv_cal_eigval)
Related
Consider the following problem: Given a set of n intervals and a set of m floating-point numbers, determine, for each floating-point number, the subset of intervals that contain the floating-point number.
This problem has been addressed by constructing an interval tree (or called range tree or segment tree). Implementations have been done for the one-dimensional case, e.g. python's intervaltree package. Usually, these implementations consider one or few floating-point numbers, namely a small "m" above.
In my problem setting, both n and m are extremely large numbers (from solving an image processing problem). Further, I need to consider the N-dimensional intervals (called cuboid when N=3, because I was modeling human brains with the Finite Element Method). I have implemented a simple N-dimensional interval tree in python, but it run in a loop and can only take one floating-point number at a time. Can anyone help improve the implementation in terms of efficiency? You can change data structure freely.
import sys
import time
import numpy as np
# find the index of a satisfying x > a in one dimension
def find_index_smaller(a, x):
idx = np.argsort(a)
ss = np.searchsorted(a, x, sorter=idx)
res = idx[0:ss]
return res
# find the index of a satisfying x < a in one dimension
def find_index_larger(a, x):
return find_index_smaller(-a, -x)
# find the index of a satisfing amin < x < amax in one dimension
def find_intv_at(amin, amax, x):
idx = find_index_smaller(amin, x)
idx2 = find_index_larger(amax[idx], x)
res = idx[idx2]
return res
# find the index of a satisfying amin < x < amax in N dimensions
def find_intv_at_nd(amin, amax, x):
dim = amin.shape[0]
res = np.arange(amin.shape[-1])
for i in range(dim):
idx = find_intv_at(amin[i, res], amax[i, res], x[i])
res = res[idx]
return res
I also have two test examples for sanity check and performance testing:
def demo1():
print ("By default, we do a correctness test")
n_intv = 2
n_point = 2
# generate the test data
point = np.random.rand(3, n_point)
intv_min = np.random.rand(3, n_intv)
intv_max = intv_min + np.random.rand(3, n_intv)*8
print ("point ")
print (point)
print ("intv_min")
print (intv_min)
print ("intv_max")
print (intv_max)
print ("===Indexes of intervals that contain the point===")
for i in range(n_point):
print (find_intv_at_nd(intv_min,intv_max, point[:, i]))
def demo2():
print ("Performance:")
n_points=100
n_intv = 1000000
# generate the test data
points = np.random.rand(n_points, 3)*512
intv_min = np.random.rand(3, n_intv)*512
intv_max = intv_min + np.random.rand(3, n_intv)*8
print ("point.shape = "+str(points.shape))
print ("intv_min.shape = "+str(intv_min.shape))
print ("intv_max.shape = "+str(intv_max.shape))
starttime = time.time()
for point in points:
tmp = find_intv_at_nd(intv_min, intv_max, point)
print("it took this long to run {} points, with {} interva: {}".format(n_points, n_intv, time.time()-starttime))
My idea would be:
Remove np.argsort() from the algo, because the interval tree does not change, so sorting could have been done in pre-processing.
Vectorize x. The algo runs a loop for each x. It would be nice if we can get rid of the loop over x.
Any contribution would be appreciated.
The code using multiprocessing causes a forever loop.
I'm using a building an iris recognition system. this is the matching function. everything works fine until the multiprocessing the part.
I'm attaching screenshot of the error output below so that you get a better idea.
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
Code:
##-----------------------------------------------------------------------------
## Import
##-----------------------------------------------------------------------------
import numpy as np
from os import listdir
from fnmatch import filter
import scipy.io as sio
from multiprocessing import Pool, cpu_count
from itertools import repeat
import warnings
warnings.filterwarnings("ignore")
##-----------------------------------------------------------------------------
## Function
##-----------------------------------------------------------------------------
def matching(template_extr, mask_extr, temp_dir, threshold=0.38):
"""
Description:
Match the extracted template with database.
Input:
template_extr - Extracted template.
mask_extr - Extracted mask.
threshold - Threshold of distance.
temp_dir - Directory contains templates.
Output:
List of strings of matched files, 0 if not, -1 if no registered sample.
"""
# Get the number of accounts in the database
n_files = len(filter(listdir(temp_dir), '*.mat'))
if n_files == 0:
return -1
# Use all cores to calculate Hamming distances
args = zip(
sorted(listdir(temp_dir)),
repeat(template_extr),
repeat(mask_extr),
repeat(temp_dir),
)
with Pool(processes=cpu_count()) as pools:
result_list = pools.starmap(matchingPool, args)
filenames = [result_list[i][0] for i in range(len(result_list))]
hm_dists = np.array([result_list[i][1] for i in range(len(result_list))])
# Remove NaN elements
ind_valid = np.where(hm_dists>0)[0]
hm_dists = hm_dists[ind_valid]
filenames = [filenames[idx] for idx in ind_valid]
# Threshold and give the result ID
ind_thres = np.where(hm_dists<=threshold)[0]
# Return
if len(ind_thres)==0:
return 0
else:
hm_dists = hm_dists[ind_thres]
filenames = [filenames[idx] for idx in ind_thres]
ind_sort = np.argsort(hm_dists)
return [filenames[idx] for idx in ind_sort]
#------------------------------------------------------------------------------
def calHammingDist(template1, mask1, template2, mask2):
"""
Description:
Calculate the Hamming distance between two iris templates.
Input:
template1 - The first template.
mask1 - The first noise mask.
template2 - The second template.
mask2 - The second noise mask.
Output:
hd - The Hamming distance as a ratio.
"""
# Initialize
hd = np.nan
# Shift template left and right, use the lowest Hamming distance
for shifts in range(-8,9):
template1s = shiftbits(template1, shifts)
mask1s = shiftbits(mask1, shifts)
mask = np.logical_or(mask1s, mask2)
nummaskbits = np.sum(mask==1)
totalbits = template1s.size - nummaskbits
C = np.logical_xor(template1s, template2)
C = np.logical_and(C, np.logical_not(mask))
bitsdiff = np.sum(C==1)
if totalbits==0:
hd = np.nan
else:
hd1 = bitsdiff / totalbits
if hd1 < hd or np.isnan(hd):
hd = hd1
# Return
return hd
#------------------------------------------------------------------------------
def shiftbits(template, noshifts):
"""
Description:
Shift the bit-wise iris patterns.
Input:
template - The template to be shifted.
noshifts - The number of shift operators, positive for right
direction and negative for left direction.
Output:
templatenew - The shifted template.
"""
# Initialize
templatenew = np.zeros(template.shape)
width = template.shape[1]
s = 2 * np.abs(noshifts)
p = width - s
# Shift
if noshifts == 0:
templatenew = template
elif noshifts < 0:
x = np.arange(p)
templatenew[:, x] = template[:, s + x]
x = np.arange(p, width)
templatenew[:, x] = template[:, x - p]
else:
x = np.arange(s, width)
templatenew[:, x] = template[:, x - s]
x = np.arange(s)
templatenew[:, x] = template[:, p + x]
# Return
return templatenew
#------------------------------------------------------------------------------
def matchingPool(file_temp_name, template_extr, mask_extr, temp_dir):
"""
Description:
Perform matching session within a Pool of parallel computation
Input:
file_temp_name - File name of the examining template
template_extr - Extracted template
mask_extr - Extracted mask of noise
Output:
hm_dist - Hamming distance
"""
# Load each account
data_template = sio.loadmat('%s%s'% (temp_dir, file_temp_name))
template = data_template['template']
mask = data_template['mask']
# Calculate the Hamming distance
hm_dist = calHammingDist(template_extr, mask_extr, template, mask)
return (file_temp_name, hm_dist)
how can I remove multiprocessing and make code still work fine?
screenshots dropbox link
Use python's itertools.starmap()
hope it helps
I have this code that generate all the 2**40 possible binary numbers, and from this binary numbers, i will try to get all the vectors that match my objectif function conditions which is:
1- each vector in the matrix must have 20 of ones(1).
2- the sum of s = s + (the index of one +1)* the rank of the one must equal 4970.
i wrote this code but it will take a lot of time maybe months, to give the results. Now, i am looking for an alternative way or an optimization of this code if possible.
import time
from multiprocessing import Process
from multiprocessing import Pool
import numpy as np
import itertools
import numpy
CC = 20
#test if there is 20 numbers of 1
def test1numebers(v,x=1,x_l=CC):
c = 0
for i in range(len(v)):
if(v[i]==x):
c+=1
if c == x_l:
return True
else:
return False
#s = s+ the nth of 1 * (index+1)
def objectif_function(v,x=1):
s = 0
for i in range(len(v)):
if(v[i]==x):
s = s+((i+1)*nthi(v,i))
return s
#calculate the nth of 1 in a vecteur
def nthi(v,i):
c = 0
for j in range(0,i+1):
if(v[j] == 1):
c+=1
return c
#generate 2**40 of all possible binray numbers
def generateMatrix(N):
l = itertools.product([0, 1], repeat=N)
return l
#function that get the number of valide vector that match our objectif function
def main_algo(N=40,S=4970):
#N = 40
m = generateMatrix(N)
#S = 4970
c = 0
ii = 0
for i in m:
ii+=1
print("\n count:",ii)
xx = i
if(test1numebers(xx)):
if(objectif_function(xx)==S):
c+=1
print('found one')
print('\n',xx,'\n')
if ii>=1000000:
break
t_end = time.time()
print('time taken for 10**6 is: ',t_end-t_start)
print(c)
#main_algo()
if __name__ == '__main__':
'''p = Process(target=main_algo, args=(40,4970,))
p.start()
p.join()'''
p = Pool(150)
print(p.map(main_algo, [40,4970]))
While you could make a lot of improvements in readability and make your code more pythonic.
I recommend that you use numpy which is the fastest way of working with matrixes.
Avoid working with matrixes on a "pixel by pixel" loop. With numpy you can make those calculations faster and with all the data at once.
Also numpy has support for generating matrixes really fast. I think that you could make a random [0,1] matrix in less lines of code and quite faster.
Also i recommend that you install OPENBLAS, ATLAS and LAPACK which make linear algebra calculations quite faster.
I hope this helps you.
Energy calculations in molecular simulation are inherently full of "for" loops. Traditionally coordinates for each atom/molecule were stored in arrays. arrays are fairly straightforward to vectorize, but structures are nice to code with. Treating molecules as individual objects, each with their own coordinates, and other properties, is very convenient and much clearer as far as book-keeping goes.
I am using Python version 3.6
My problem is that I cannot figure out how to vectorize calculations when I am using an array of objects... it seems that a for loop cannot be avoided. Is it necessary for me to use arrays in order to take advantage of numpy and vectorize my code?
Here is a python example which utilizes arrays (line 121 of the code), and shows a fast (numpy) and slow ( 'normal') python energy calculation.
https://github.com/Allen-Tildesley/examples/blob/master/python_examples/mc_lj_module.py
The calculation is much faster using the numpy accelerated method because it is vectorized.
How would I vectorize an energy calculation if I was not using arrays, but an array of objects, each with their own coordinates? This seems to necessitate using the slower for loop.
Here is a simple example code with a working slow version of the for loop, and an attempted vectorization that doesn't work:
import numpy as np
import time
class Mol:
num = 0
def __init__(self, r):
Mol.num += 1
self.r = np.empty((3),dtype=np.float_)
self.r[0] = r[0]
self.r[1] = r[1]
self.r[2] = r[2]
""" Alot more useful things go in here in practice"""
################################################
# #
# Main Program #
# #
################################################
L = 5.0 # Length of simulation box (arbitrary)
r_cut_box_sq = L/2 # arbitrary cutoff - required
mol_list=[]
nmol = 1000 # number of molecules
part = 1 # arbitrary molecule to interact with rest of molecules
""" make 1000 molecules (1 atom per molecule), give random coordinates """
for i in range(nmol):
r = np.random.rand(3) * L
mol_list.append( Mol( r ) )
energy = 0.0
start = time.time()
################################################
# #
# Slow but functioning loop #
# #
################################################
for i in range(nmol):
if i == part:
continue
rij = mol_list[part].r - mol_list[i].r
rij = rij - np.rint(rij/L)*L # apply periodic boundary conditions
rij_sq = np.sum(rij**2) # Squared separations
in_range = rij_sq < r_cut_box_sq
sr2 = np.where ( in_range, 1.0 / rij_sq, 0.0 )
sr6 = sr2 ** 3
sr12 = sr6 ** 2
energy += sr12 - sr6
end = time.time()
print('slow: ', end-start)
print('energy: ', energy)
start = time.time()
################################################
# #
# Failed vectorization attempt #
# #
################################################
""" The next line is my problem, how do I vectorize this so I can avoid the for loop all together?
Leads to error AttributeError: 'list' object has no attribute 'r' """
""" I also must add in that part cannot interact with itself in mol_list"""
rij = mol_list[part].r - mol_list[:].r
rij = rij - np.rint(rij/L)*L # apply periodic boundary conditions
rij_sq = np.sum(rij**2)
in_range = rij_sq < r_cut_box_sq
sr2 = np.where ( in_range, 1.0 / rij_sq, 0.0 )
sr6 = sr2 ** 3
sr12 = sr6 ** 2
energy = sr12 - sr6
energy = sum(energy)
end = time.time()
print('faster??: ', end-start)
print('energy: ', energy)
Lastly
Would any possible solutions be affected if inside the energy calculation, it was necessary to loop over each atom in each molecule where their is now more than 1 atom per molecule, and not all molecules have the same number of atoms, thus having a double for loop for molecule-molecule interactions rather than the simple pair-pair interactions currently employed.
Making use of the itertools library might be the way forward here. Suppose you wrap the energy calculation of a pair of molecules in a function:
def calc_pairwise_energy((mol_a,mol_b)):
# function takes a 2 item tuple of molecules
# energy calculating code here
return pairwise_energy
Then you can use itertools.combinations to get all the pairs of molecules and python's built in list comprehensions (the code inside [ ] on the last line below):
from itertools import combinations
pairs = combinations(mol_list,2)
energy = sum( [calc_pairwise_energy(pair) for pair in pairs] )
I've come back to this answer as I realised I hadn't properly answered your question. With what I've already posted the pairwise energy calculation function looked like this (I've made a few optimisations to your code):
def calc_pairwise_energy(molecules):
rij = molecules[0].r - molecules[1].r
rij = rij - np.rint(rij/L)*L
rij_sq = np.sum(rij**2) # Squared separations
if rij_sq < r_cut_box_sq:
return (rij_sq ** -6) - (rij_sq ** - 3)
else:
return 0.0
Whereas a vectorised implementation that does all the pairwise calculations in a single call might look like this:
def calc_all_energies(molecules):
energy = 0
for i in range(len(molecules)-1):
mol_a = molecules[i]
other_mols = molecules[i+1:]
coords = np.array([mol.r for mol in other_mols])
rijs = coords - mol_a.r
# np.apply_along_axis replaced as per #hpaulj's comment (see below)
#rijs = np.apply_along_axis(lambda x: x - np.rint(x/L)*L,0,rijs)
rijs = rijs - np.rint(rijs/L)*L
rijs_sq = np.sum(rijs**2,axis=1)
rijs_in_range= rijs_sq[rijs_sq < r_cut_box_sq]
energy += sum(rijs_in_range ** -6 - rijs_in_range ** -3)
return energy
This is much faster but there is still plenty to optimise here.
If you want to calculate energies with coordinates as inputs, I'm assuming you're looking for pair-wise distances. For this, you should look into the SciPy library. Specifically, I would look at scipy.spatial.distance.pdist. The documentation can be found here.
I am trying to implement dijkstra's algorithm (on an undirected graph) to find the shortest path and my code is this.
Note: I am not using heap/priority queue or anything but an adjacency list, a dictionary to store weights and a bool list to avoid cycling in the loops/recursion forever. Also, the algorithm works for most test cases but fails for this particular one here: https://ideone.com/iBAT0q
Important : Graph can have multiple edges from v1 to v2 (or vice versa), you have to use the minimum weight.
import sys
sys.setrecursionlimit(10000)
def findMin(n):
for i in x[n]:
cost[n] = min(cost[n],cost[i]+w[(n,i)])
def dik(s):
for i in x[s]:
if done[i]:
findMin(i)
done[i] = False
dik(i)
return
q = int(input())
for _ in range(q):
n,e = map(int,input().split())
x = [[] for _ in range(n)]
done = [True]*n
w = {}
cost = [1000000000000000000]*n
for k in range(e):
i,j,c = map(int,input().split())
x[i-1].append(j-1)
x[j-1].append(i-1)
try: #Avoiding multiple edges
w[(i-1,j-1)] = min(c,w[(i-1,j-1)])
w[(j-1,i-1)] = w[(i-1,j-1)]
except:
try:
w[(i-1,j-1)] = min(c,w[(j-1,i-1)])
w[(j-1,i-1)] = w[(i-1,j-1)]
except:
w[(j-1,i-1)] = c
w[(i-1,j-1)] = c
src = int(input())-1
#for i in sorted(w.keys()):
# print(i,w[i])
done[src] = False
cost[src] = 0
dik(src) #First iteration assigns possible minimum to all nodes
done = [True]*n
dik(src) #Second iteration to ensure they are minimum
for val in cost:
if val == 1000000000000000000:
print(-1,end=' ')
continue
if val!=0:
print(val,end=' ')
print()
The optimum isn't always found in the second pass. If you add a third pass to your example, you get closer to the expected result and after the fourth iteration, you're there.
You could iterate until no more changes are made to the cost array:
done[src] = False
cost[src] = 0
dik(src)
while True:
ocost = list(cost) # copy for comparison
done = [True]*n
dik(src)
if cost == ocost:
break