Related
Tl Dr. If I were to explain the problem in short:
I have signals:
np.random.seed(42)
x = np.random.randn(1000)
y = np.random.randn(1000)
z = np.random.randn(1000)
and human readable string tuple logic like :
entry_sig_ = ((x,y,'crossup',False),)
exit_sig_ = ((x,z,'crossup',False), 'or_',(x,y,'crossdown',False))
where:
'entry_sig_' means the output will be 1 when the time series unfolds from left to right and 'entry_sig_' is hit. (x,y,'crossup',False) means: x crossed y up at a particular time i, and False means signal doesn't have "memory". Otherwise number of hits accumulates.
'exit_sig_' means the output will again become '0' when the 'exit_sig_' is hit.
The output is generated through:
#njit
def run(x, entry_sig, exit_sig):
'''
x: np.array
entry_sig, exit_sig: homogeneous tuples of tuple signals
Returns: sequence of 0 and 1 satisfying entry and exit sigs
'''
L = x.shape[0]
out = np.empty(L)
out[0] = 0.0
out[-1] = 0.0
i = 1
trade = True
while i < L-1:
out[i] = 0.0
if reduce_sig(entry_sig,i) and i<L-1:
out[i] = 1.0
trade = True
while trade and i<L-2:
i += 1
out[i] = 1.0
if reduce_sig(exit_sig,i):
trade = False
i+= 1
return out
reduce_sig(sig,i) is a function (see definition below) that parses the tuple and returns resulting output for a given point in time.
Question:
As of now, an object of SingleSig class is instantiated in the for loop from scratch for any given point in time; thus, not having "memory", which totally cancels the merits of having a class, a bare function will do. Does there exist a workaround (a different class template, a different approach, etc) so that:
combined tuple signal can be queried for its value at a particular point in time i.
"memory" can be reset; i.e. e.g. MultiSig(sig_tuple).memory_field can be set to 0 at a constituent signals levels.
Following code adds a memory to the signals which can be wiped using MultiSig.reset() to reset the count of all signals to 0. The memory can be queried using MultiSig.query_memory(key) to return the number of hits for that signal at that time.
For the memory function to work, I had to add unique keys to the signals to identify them.
from numba import njit, int64, float64, types
from numba.types import Array, string, boolean
from numba import jitclass
import numpy as np
np.random.seed(42)
x = np.random.randn(1000000)
y = np.random.randn(1000000)
z = np.random.randn(1000000)
# Example of "human-readable" signals
entry_sig_ = ((x,y,'crossup',False),)
exit_sig_ = ((x,z,'crossup',False), 'or_',(x,y,'crossdown',False))
# Turn signals into homogeneous tuple
#entry_sig_
entry_sig = (((x,y,'crossup',False),'NOP','1'),)
#exit_sig_
exit_sig = (((x,z,'crossup',False),'or_','2'),((x,y,'crossdown',False),'NOP','3'))
#njit
def cross(x, y, i):
'''
x,y: np.array
i: int - point in time
Returns: 1 or 0 when condition is met
'''
if (x[i - 1] - y[i - 1])*(x[i] - y[i]) < 0:
out = 1
else:
out = 0
return out
kv_ty = (types.string,types.int64)
spec = [
('memory', types.DictType(*kv_ty)),
]
#njit
def single_signal(x, y, how, acc, i):
'''
i: int - point in time
Returns either signal or accumulator
'''
if cross(x, y, i):
if x[i] < y[i] and how == 'crossdown':
out = 1
elif x[i] > y[i] and how == "crossup":
out = 1
else:
out = 0
else:
out = 0
return out
#jitclass(spec)
class MultiSig:
def __init__(self,entry,exit):
'''
initialize memory at single signal level
'''
memory_dict = {}
for i in entry:
memory_dict[str(i[2])] = 0
for i in exit:
memory_dict[str(i[2])] = 0
self.memory = memory_dict
def reduce_sig(self, sig, i):
'''
Parses multisignal
sig: homogeneous tuple of tuples ("human-readable" signal definition)
i: int - point in time
Returns: resulting value of multisignal
'''
L = len(sig)
out = single_signal(*sig[0][0],i)
logic = sig[0][1]
if out:
self.update_memory(sig[0][2])
for cnt in range(1, L):
s = single_signal(*sig[cnt][0],i)
if s:
self.update_memory(sig[cnt][2])
out = out | s if logic == 'or_' else out & s
logic = sig[cnt][1]
return out
def update_memory(self, key):
'''
update memory
'''
self.memory[str(key)] += 1
def reset(self):
'''
reset memory
'''
dicti = {}
for i in self.memory:
dicti[i] = 0
self.memory = dicti
def query_memory(self, key):
'''
return number of hits on signal
'''
return self.memory[str(key)]
#njit
def run(x, entry_sig, exit_sig):
'''
x: np.array
entry_sig, exit_sig: homogeneous tuples of tuples
Returns: sequence of 0 and 1 satisfying entry and exit sigs
'''
L = x.shape[0]
out = np.empty(L)
out[0] = 0.0
out[-1] = 0.0
i = 1
multi = MultiSig(entry_sig,exit_sig)
while i < L-1:
out[i] = 0.0
if multi.reduce_sig(entry_sig,i) and i<L-1:
out[i] = 1.0
trade = True
while trade and i<L-2:
i += 1
out[i] = 1.0
if multi.reduce_sig(exit_sig,i):
trade = False
i+= 1
return out
run(x, entry_sig, exit_sig)
To reiterate what I said in the comments, | and & are bitwise operators, not logical operators. 1 & 2 outputs 0/False which is not what I believe you want this to evaluate to so I made sure the out and s can only be 0/1 in order for this to produce the expected output.
You are aware that the because of:
out = out | s if logic == 'or_' else out & s
the order of the time-series inside entry_sig and exit_sig matters?
Let (output, logic) be tuples where output is 0 or 1 according to how crossup and crossdown would evalute the passed information of the tuple and logic is or_ or and_.
tuples = ((0,'or_'),(1,'or_'),(0,'and_'))
out = tuples[0][0]
logic = tuples[0][1]
for i in range(1,len(tuples)):
s = tuples[i][0]
out = out | s if logic == 'or_' else out & s
out = s
logic = tuples[i][1]
print(out)
0
changing the order of the tuple yields the other signal:
tuples = ((0,'or_'),(0,'and_'),(1,'or_'))
out = tuples[0][0]
logic = tuples[0][1]
for i in range(1,len(tuples)):
s = tuples[i][0]
out = out | s if logic == 'or_' else out & s
out = s
logic = tuples[i][1]
print(out)
1
The performance hinges on how many times the count needs to be updated. Using n=1,000,000 for all three time series, your code had a mean run-time of 0.6s on my machine, my code had 0.63s.
I then changed the crossing logic up a bit to save the number of if/else so that the nested if/else is only triggered if the time-series crossed which can be checked by one comparison only. This further halved the difference in run-time so above code now sits at 2.5% longer run-time your original code.
I am currently making a Decision tree classifier using Gini and Information Gain and splitting the tree based on the the best attribute with the most gain each time. However, it is sticking the same attribute every time and simply adjusting the value for its question. This results in a very low accuracy of usually around 30% as it is only taking into account the very first attribute.
Finding the best split
# Used to find the best split for data among all attributes
def split(r):
max_ig = 0
max_att = 0
max_att_val = 0
i = 0
curr_gini = gini_index(r)
n_att = len(att)
for c in range(n_att):
if c == 3:
continue
c_vals = get_column(r, c)
while i < len(c_vals):
# Value of the current attribute that is being tested
curr_att_val = r[i][c]
true, false = fork(r, c, curr_att_val)
ig = gain(true, false, curr_gini)
if ig > max_ig:
max_ig = ig
max_att = c
max_att_val = r[i][c]
i += 1
return max_ig, max_att, max_att_val
Compare to split the data down the true based on true or false
# Used to compare and test if the current row is greater than or equal to the test value
# in order to split up the data
def compare(r, test_c, test_val):
if r[test_c].isdigit():
return r[test_c] == test_val
elif float(r[test_c]) >= float(test_val):
return True
else:
return False
# Splits the data into two lists for the true/false results of the compare test
def fork(r, c, test_val):
true = []
false = []
for row in r:
if compare(row, c, test_val):
true.append(row)
else:
false.append(row)
return true, false
Iterate through tree
def rec_tree(r):
ig, att, curr_att_val = split(r)
if ig == 0:
return Leaf(r)
true_rows, false_rows = fork(r, att, curr_att_val)
true_branch = rec_tree(true_rows)
false_branch = rec_tree(false_rows)
return Node(att, curr_att_val, true_branch, false_branch)
The working solution i have was to change the split function as follows. To be completly honest i amnt able to see whats wrong but it might be obvious
The working function is as follows
def split(r):
max_ig = 0
max_att = 0
max_att_val = 0
# calculates gini for the rows provided
curr_gini = gini_index(r)
no_att = len(r[0])
# Goes through the different attributes
for c in range(no_att):
# Skip the label column (beer style)
if c == 3:
continue
column_vals = get_column(r, c)
i = 0
while i < len(column_vals):
# value we want to check
att_val = r[i][c]
# Use the attribute value to fork the data to true and false streams
true, false = fork(r, c, att_val)
# Calculate the information gain
ig = gain(true, false, curr_gini)
# If this gain is the highest found then mark this as the best choice
if ig > max_ig:
max_ig = ig
max_att = c
max_att_val = r[i][c]
i += 1
return max_ig, max_att, max_att_val
I use Sympy solve() function to solve a large number of equations. All variables in the equations are defined as symbols. Variables can start with the letter P or F. I use solve() to express one specific P variable (the one that I observe) with only F variables, so I use solve() to substitute all other P variables with F variables. The sum of the coefficients before the F variables is ideally 1 or almost 1 (e.g.: 0.99).
This produces good results till a certain point where the number of equations becomes pretty big and also their length. There the Sympy solve() function starts to give me wrong results. The sum of the coefficients becomes negative (e.g. -7,...). It looks like that the solve() function gets problems with substituting any carrying over all variables and their coefficients.
Is there a way to correct this problem?
Dictionary of equations under link: https://drive.google.com/open?id=1VBQucrDU-o1diCd6i4rR3MlRh95qycmK
import json
from sympy import Symbol, Add, Eq, solve
# Get data
# data from link above
with open("C:\\\\Test\\dict.json") as f:
equations = json.load(f)
comp =[]
expressions = []
for p, equation_components in equations.items():
p = Symbol(p)
comp.append(p)
expression = []
for name, multiplier in equation_components.items():
if type(multiplier) == float or type(multiplier) == int:
expression.append(Symbol(name) * multiplier)
else:
expression.append(Symbol(name) * Symbol(multiplier))
expressions.append(Eq(p, Add(*expression)))
# Solution for variable P137807
print("Solving...")
# Works for slice :364 !!!!!
solutions = solve(expressions[:364], comp[:364], simplify=False, rational=False)
# Gives wrong results for slice :366 and above !!!!!
# solutions = solve(expressions[:366], comp[:366], simplify=False, rational=False)
vm_symbol = Symbol("P137807")
solution_1 = solutions[vm_symbol]
print("\n")
print("Solution_1:")
print(solution_1)
print("\n")
#Sum of coefficients
list_sum = []
for i in solution_1.args:
if str(i.args[1]) != "ANaN":
list_sum.append(i.args[0])
coeff_sum = sum(list_sum)
print("Sum:")
print(coeff_sum)
...
I just wanted to mark the problem as solved and provide reference to the solution. Please look at numerical instability when solving n=385 linear equations with Float coefficients #17136.
The solution that worked for me was to use the following solver and not the Sympy solve() function:
def ssolve(eqs, syms):
"""return the solution of linear system of equations
with symbolic coefficients and a unique solution.
Examples
========
>>> eqs=[x-1,x+2*y-z-2,x+z+w-6,2*y+z+x-2]
>>> v=[x,y,z,w]
>>> ssolve(eqs, v)
{x: 1, z: 0, w: 5, y: 1/2}
"""
from sympy.solvers.solveset import linear_coeffs
v = list(syms)
N = len(v)
# convert equations to coefficient dictionaries
print('checking linearity')
d = []
v0 = v + [0]
for e in [i.rewrite(Add) for i in eqs]:
co = linear_coeffs(e, *v)
di = dict([(i, c) for i, c in zip(v0, co) if c or not i])
d.append(di)
print('forward solving')
sol = {}
impl = {}
done = False
while not done:
# check for those that are done
more = set([i for i, di in enumerate(d) if len(di) == 2])
did = 0
while more:
di = d[more.pop()]
c = di.pop(0)
x = list(di)[0]
a = di.pop(x)
K = sol[x] = -c/a
v.remove(x)
changed = True
did += 1
# update everyone else
for j, dj in enumerate(d):
if x not in dj:
continue
dj[0] += dj.pop(x)*K
if len(dj) == 2:
more.add(j)
if did: print('found',did,'definitions')
# solve implicitly for the next variable
dcan = [i for i in d if len(i) > 2]
if not dcan:
done = True
else:
# take shortest first
di = next(ordered(dcan, lambda i: len(i)))
done = False
x = next(ordered(i for i in di if i))
c = di.pop(x)
for k in di:
di[k] /= -c
impl[x] = di.copy()
di.clear()
v.remove(x)
# update everyone else
for j, dj in enumerate(d):
if x not in dj:
continue
done = False
c = dj.pop(x)
for k in impl[x]:
dj[k] = dj.get(k, 0) + impl[x][k]*c
have = set(sol)
sol[0] = 1
while N - len(have):
print(N - len(have), 'to backsub')
for k in impl:
if impl[k] and not set(impl[k]) - have - {0}:
sol[k] = sum(impl[k][vi]*sol[vi] for vi in impl[k])
impl[k].clear()
have.add(k)
sol.pop(0)
return sol
I have a fairly long code that processes spectra, and along the way I need an interpolation of some points. I used to have all this code written line-by-line without any functions, and it all worked properly, but now I'm converting it to two large functions so that I can call it on other models more easily in the future. Below is my code (I have more code after the last line here that plots some things, but that's not relevant to my issue, since I've tested this with a bunch of print lines and learned that my issue arises when I call the interpolation function inside my process function.
import re
import numpy as np
import scipy.interpolate
# Required files and lists
filename = 'bpass_spectra.txt' # number of columns = 4
extinctionfile = 'ExtinctionLawPoints.txt' # R_V = 4.0
datalist = []
if filename == 'bpass_spectra.txt':
filetype = 4
else:
filetype = 1
if extinctionfile == 'ExtinctionLawPoints.txt':
R_V = 4.0
else:
R_V = 1.0 #to be determined
# Constants
h = 4.1357e-15 # Planck's constant [eV s]
c = float(3e8) # speed of light [m/s]
# Inputs
beta = 2.0 # power used in extinction law
R = 1.0 # star formation rate [Msun/yr]
z = 1.0 # redshift
M_gas = 1.0 # mass of gas
M_halo = 2e41 # mass of dark matter halo
# Read spectra file
f = open(filename, 'r')
rawlines = f.readlines()
met = re.findall('Z\s=\s(\d*\.\d+)', rawlines[0])
del rawlines[0]
for i in range(len(rawlines)):
newlist = rawlines[i].split(' ')
datalist.append(newlist)
# Read extinction curve data file
rawpoints = open(extinctionfile, 'r').readlines()
def interpolate(R_V, rawpoints, Elist, i):
pointslist = []
if R_V == 4.0:
for i in range(len(rawpoints)):
newlst = re.split('(?!\S)\s(?=\S)|(?!\S)\s+(?=\S)', rawpoints[i])
pointslist.append(newlst)
pointslist = pointslist[3:]
lambdalist = [float(item[0]) for item in pointslist]
k_abslist = [float(item[4]) for item in pointslist]
xvallist = [(c*h)/(lamb*1e-6) for lamb in lambdalist]
k_interp = scipy.interpolate.interp1d(xvallist, k_abslist)
return k_interp(Elist[i])
# Processing function
def process(interpolate, filetype, datalist, beta, R, z, M_gas, M_halo, met):
speclist = []
if filetype == 4:
metallicity = float(met[0])
Elist = [float(item[0]) for item in datalist]
speclambdalist = [h*c*1e9/E for E in Elist]
met1list = [float(item[1]) for item in datalist]
speclist.extend(met1list)
klist, Tlist = [None]*len(speclist), [None]*len(speclist)
if metallicity > 0.0052:
DGRlist = [50.0*np.exp(-2.21)*metallicity]*len(speclist) # dust to gas ratio
elif metallicity <= 0.0052:
DGRlist = [((50.0*metallicity)**3.15)*np.exp(-0.96)]*len(speclist)
for i in range(len(speclist)):
if Elist[i] <= 4.1357e-3: # frequencies <= 10^12 Hz
klist[i] = 0.1*(float(Elist[i])/(1000.0*h))**beta # extinction law [cm^2/g]
elif Elist[i] > 4.1357e-3: # frequencies > 10^12 Hz
klist[i] = interpolate(R_V, rawpoints, Elist, i) # interpolated function's value at Elist[i]
print "KLIST (INTERPOLATION) ELEMENTS 0 AND 1000:", klist[0], klist[1000]
return
The output from the print line is KLIST (INTERPOLATION) ELEMENTS 0 AND 1000: 52167.31734159269 52167.31734159269.
When I run my old code without functions, I print klist[0] and klist[1000] like I do here and get different values for each. In this new code, I get back two values that are the same from this line. This shouldn't be the case, so it must not be interpolating correctly inside my function (maybe it's not performing it on each point correctly in the loop?). Does anyone have any insight? It would be unreasonable to post my entire code with all the used text files here (they're very large), so I'm not expecting anyone to run it, but rather examine how I use and call my functions.
Edit: Below is the original version of my code up to the interpolation point without the functions (which works).
import re
import numpy as np
import scipy.interpolate
filename = 'bpass_spectra.txt'
extinctionfile = 'ExtinctionLawPoints.txt' # from R_V = 4.0
pointslist = []
datalist = []
speclist = []
# Constants
h = 4.1357e-15 # Planck's constant [eV s]
c = float(3e8) # speed of light [m/s]
# Read spectra file
f = open(filename, 'r')
rawspectra = f.readlines()
met = re.findall('Z\s=\s(\d*\.\d+)', rawspectra[0])
del rawspectra[0]
for i in range(len(rawspectra)):
newlist = rawspectra[i].split(' ')
datalist.append(newlist)
# Read extinction curve data file
rawpoints = open(extinctionfile, 'r').readlines()
for i in range(len(rawpoints)):
newlst = re.split('(?!\S)\s(?=\S)|(?!\S)\s+(?=\S)', rawpoints[i])
pointslist.append(newlst)
pointslist = pointslist[3:]
lambdalist = [float(item[0]) for item in pointslist]
k_abslist = [float(item[4]) for item in pointslist]
xvallist = [(c*h)/(lamb*1e-6) for lamb in lambdalist]
k_interp = scipy.interpolate.interp1d(xvallist, k_abslist)
# Create new lists
Elist = [float(item[0]) for item in datalist]
speclambdalist = [h*c*1e9/E for E in Elist]
z1list = [float(item[1]) for item in datalist]
speclist.extend(z1list)
met = met[0]
klist = [None]*len(speclist)
Loutlist = [None]*len(speclist)
Tlist = [None]*len(speclist)
# Define parameters
b = 2.0 # power used in extinction law (beta)
R = 1.0 # star formation ratw [Msun/yr]
z = 1.0 # redshift
Mgas = 1.0 # mass of gas
Mhalo = 2e41 # mass of dark matter halo
if float(met) > 0.0052:
DGRlist = [50.0*np.exp(-2.21)*float(met)]*len(speclist)
elif float(met) <= 0.0052:
DGRlist = [((50.0*float(met))**3.15)*np.exp(-0.96)]*len(speclist)
for i in range(len(speclist)):
if float(Elist[i]) <= 4.1357e-3: # frequencies <= 10^12 Hz
klist[i] = 0.1*(float(Elist[i])/(1000.0*h))**b # extinction law [cm^2/g]
elif float(Elist[i]) > 4.1357e-3: # frequencies > 10^12 Hz
klist[i] = k_interp(Elist[i]) # interpolated function's value at Elist[i]
print "KLIST (INTERPOLATION) ELEMENTS 0 AND 1000:", klist[0], klist[1000]
The output from this print line is KLIST (INTERPOLATION) ELEMENTS 0 AND 1000 7779.275435560996 58253.589270674354.
You are passing i as an argument to interpolate, and then also using i in a loop within interpolate. Once i is used within the for i in range(len(rawpoints)) loop in interpolate, it will be set to some value: len(rawpoints)-1. The interpolate function will then always return the same value k_interp(Elist[i]), which is equivalent to k_interp(Elist[len(rawpoints)-1]). You will need to either define a new variable within your loop (e.g. for not_i in range(len(rawpoints))), or use a different variable for the Elist argument. Consider the following change to interpolate:
def interpolate(R_V, rawpoints, Elist, j):
pointslist = []
if R_V == 4.0:
for i in range(len(rawpoints)):
newlst = re.split('(?!\S)\s(?=\S)|(?!\S)\s+(?=\S)', rawpoints[i])
pointslist.append(newlst)
pointslist = pointslist[3:]
lambdalist = [float(item[0]) for item in pointslist]
k_abslist = [float(item[4]) for item in pointslist]
xvallist = [(c*h)/(lamb*1e-6) for lamb in lambdalist]
k_interp = scipy.interpolate.interp1d(xvallist, k_abslist)
return k_interp(Elist[j])
I want to write simple toy code for implicitly restart Lanczos method.
Without implicit restarting, the code is perfectly working but when I turn on the restart, I cannot get proper solution
To my knowledge, newly constructed w should be orthogonal to all of the new Lanczos vectors. For the first restart, the orthogonality is well preserved but from the second restart, the orthogonality is significantly broken down and the program does not find proper eigenvalues.
I already spent several tens of hours to fix it. I almost gave up...... Here is my python code
"""
Author: Sunghwan Choi
Date Created: June 19, 2017
Python Version: 2.7 or 3.5
Reference for Lanczos algorithm
http://www.netlib.org/utk/people/JackDongarra/etemplates/node104.html
Reference for implicit restart
http://www.netlib.org/utk/people/JackDongarra/etemplates/node118.html
"""
import numpy as np
from scipy.sparse.linalg import eigsh
#from scipy.sparse import eye
from scipy.sparse import coo_matrix
from numpy import eye
def clustering(eigvals,eigvecs,tol=1e-2):
ret_eigvals=[]
ret_eigvecs=[]
for i in range(len(eigvals)):
for ret_eigval, ret_eigvec in zip (ret_eigvals,ret_eigvecs):
if (np.abs(eigvals[i]/ret_eigval-1.0)<tol ):
break
else:
ret_eigvals.append(eigvals[i])
ret_eigvecs.append(eigvecs[:,i])
ret_eigvals=np.array(ret_eigvals)
ret_eigvecs=np.array(ret_eigvecs).T
return ret_eigvals,ret_eigvecs
def check_conv(matrix, cal_eigval, cal_eigvec, tol):
indices=[]
for i in range(len(cal_eigval)):
if(np.linalg.norm(matrix.dot(cal_eigvec[:,i]) - cal_eigval[i]*cal_eigvec[:,i])< tol):
indices.append(i)
return indices
################ input
size=1600
max_step=20000
which='SA'
#implicit=False
implicit=True
energy_range=[0.0,6.0]
tol = 1e-5
n_eig=6
n_tol_check=40 # n_tol_check>n_eig ==0
######################
# generate 1D harmonic oscillator
h=0.1
matrix=-5/2*eye(size)
matrix+=4/3*(eye(size,k=1)+eye(size,k=-1))
matrix+=-1/12*(eye(size,k=2)+eye(size,k=-2))
matrix=-0.5*matrix/(h*h)
distance =lambda index: (index-size/2)*h
matrix+=np.diagflat( list(map( lambda i: 0.5*distance(i)**2, range(size))))
# solve eigenvalue problem to check validity
true_eigval,true_eigvec = eigsh(matrix,k=50,which=which)
indices = np.all([true_eigval>energy_range[0], true_eigval<energy_range[1]],axis=0)
true_eigval = true_eigval[indices]
true_eigvec = true_eigvec[:,indices]
#initialize variables
alpha=[]; beta=[]
index_v=0
restart_interval = n_tol_check+n_eig if implicit is not False else max_step
T = np.zeros((restart_interval,restart_interval))
v = np.zeros((size,restart_interval))
#Q=np.eye(restart_interval)
#generate initial vector
np.random.seed(1)
initial_vec = np.random.random(size)
#initial_vec = np.loadtxt("tmp")
w = v[:,index_v] = initial_vec/np.linalg.norm(initial_vec)
init_beta = np.linalg.norm(w)
# start Lanczos i_step
for i_step in range(max_step):
if (i_step is 0):
v[:,index_v] = w/init_beta
else:
v[:,index_v] = w/T[index_v,index_v-1]
w=matrix.dot(v[:,index_v])
if (i_step is 0):
w=w-init_beta*v[:,index_v-1]
else:
w=w-T[index_v,index_v-1]*v[:,index_v-1]
T[index_v,index_v]=np.dot(w,v[:,index_v])
w -=T[index_v,index_v]*v[:,index_v]
#check convergence
if ((i_step+1)%n_tol_check==n_eig and i_step>n_eig):
# calculate eigenval of T matrix
cal_eigval, cal_eigvec_= np.linalg.eigh(T[:index_v+1,:index_v+1])
cal_eigvec = np.dot(v[:,:index_v+1],cal_eigvec_)
#check tolerance
conv_indices = check_conv(matrix, cal_eigval, cal_eigvec,tol)
#filter energy_range
indices = np.all([cal_eigval[conv_indices]>energy_range[0], cal_eigval[conv_indices]<energy_range[1]],axis=0)
#check clustering
conv_cal_eigval,conv_cal_eigvec = clustering((cal_eigval[conv_indices])[indices], (cal_eigvec[conv_indices])[indices])
if (len(conv_cal_eigval)>=n_eig):
break
# implicit restarting
if (implicit is True):
Q=np.eye(restart_interval)
# do shift & QR decomposition
indices = np.argsort(np.abs(cal_eigval-np.mean(energy_range)))
for index in indices[n_eig:]:
new_Q,new_R = np.linalg.qr(T-cal_eigval[index]*np.eye(len(T)))
T = np.dot(new_Q.T,np.dot(T,new_Q))
v = np.dot(v,new_Q)
Q = np.dot(Q,new_Q)
w=v[:,n_eig]*T[n_eig,n_eig-1]+w*Q[-1,n_eig-1]
v[:,n_eig:]=0.0
T[:,n_eig:] = 0.0
T[n_eig:,:] = 0.0
#for debug
#print(np.dot(w.T, v))
# reset index
index_v=n_eig-1
index_v+=1
T[index_v,index_v-1]=np.linalg.norm(w)
T[index_v-1,index_v]=np.linalg.norm(w)
else:
print("not converged")
exit(-1)
print ("energy window: (", energy_range[0],",",energy_range[1],")")
print ("true eigenvalue")
print(true_eigval)
print ("eigenvalue from Lanczos w/ implicit restart (",i_step+1,")")
print(conv_cal_eigval)