I am currently working on a project that requires me to run a complete python code base. For research purpose, I need to run the code as fast as possible. Yet I am fairly new to programming and have no idea how to reduce run time. So I hope someone can help me on that. Any advice would be appreciated. Here's part of my code base, which used a lot of nested for loops, so it might significantly increase run time.
def a_j(r, a, A): # the Claussius-Mossotti factor, determined by a symmetric (3 × 3) matrix such that (A_i)^T = A_i
alph = np.array([[0,0,0],[0,0,0],[0,0,0]],complex)
for i in range(3):
for j in range(3):
alph[i,j] = (r * a * A[i,j])
return alph
def W_ext(x, k, rho, alpha, A): # particle–particle interaction term
n = x.shape[0] # the number of x vextors
result = np.zeros([3*n,3*n],complex)
u = np.zeros((n, 3)) # u = x - x'
for i in range(n):
for j in range(n):
if i != j:
u[i] = x[i] - x[j]
block_result = a_j(rho[i], alpha, A) * G((u[i]), k) * a_j(rho[j], alpha, A)
for m in range(3):
for l in range(3):
result[3*i + m, 3*j + l] = block_result[m,l]
return result.imag
def A_ext(rho, a, A): # single-particle term
n = rho.shape[0]
result = np.zeros([3*n,3*n],complex)
for i in range(n):
for j in range(n):
if i == j:
block_result = a_j(rho[i], a, A).imag
for m in range(3):
for l in range(3):
result[3*i + m, 3*j + l] = block_result[m,l]
return result # (3 x 3) matrix
def P_ext(e, A, W, omega):
eT = np.matrix.getH(e)
mm1 = np.matmul(A, e)
mm2 = np.matmul(W, e)
extinction = (np.dot(eT, mm1) + np.dot(eT, mm2)) * (omega/2.0)
return extinction
#ABSORPTION
def W_abs(x, k, rho, alpha, A, chi): # particle–particle interaction term
n = x.shape[0]
result = np.zeros([3*n,3*n],complex)
u = np.zeros((n, 3))
for i in range(n):
for j in range(n):
if i != j:
u[i] = x[i] - x[j]
block_result = np.matrix.getH(a_j(rho[i], alpha, A)) * (1.0 / np.conjugate(chi)).imag * a_j(rho[i], alpha, A) * G((u[i]), k) * a_j(rho[j], alpha, A)
for m in range(3):
for l in range(3):
result[3*i + m, 3*j + l] = block_result[m,l]
return 2.0 * result.real # (3 x 3) matrix
def A_abs(rho, a, A, chi): # single-particle term
n = rho.shape[0]
result = np.zeros([3*n,3*n],complex)
for i in range(n):
for j in range(n):
if i == j:
block_result = np.matrix.getH(a_j(rho[i], a, A)) * (1.0 / np.conjugate(chi)).imag * a_j(rho[i], a, A)
for m in range(3):
for l in range(3):
result[3*i + m, 3*j + l] = block_result[m,l]
return result # (3 x 3) matrix
I would like to make a function that calculates something depending on the input. I have tried to make it so that if the input is not given, then it be set as 0 by default. I will either provide a and d or b and c but no crossover.
import numpy as np
data_t = np.random.uniform(2.25, 2.75, size=10)
data_a = np.random.uniform(9.5, 10.5, size=(20,10)); data_a = list(data_a)
data_b = np.random.uniform(6.5, 7.5, size=(20,10)); data_b = list(data_b)
data_c = np.random.uniform(14.5, 15.5, size=(20,10)); data_c = list(data_c)
data_d = np.random.uniform(18.5, 19.5, size=(20,10)); data_d = list(data_d)
def TEST(t, a=0, b=0, c=0, d=0):
t = np.array(t)
if a and d == 0:
m, n = np.array(a.shape)
x1 = np.array([[b[jx, ix] + t[jx] for ix, i in enumerate(range(n))] for jx, _ in enumerate(range(m))])
y1 = np.array([[c[jx, ix] + t[jx] for ix, i in enumerate(range(n))] for jx, _ in enumerate(range(m))])
elif b and c == 0:
m, n = np.array(a.shape)
x2 = np.array([[a[jx, ix] - t[jx] for ix, i in enumerate(range(n))] for jx, _ in enumerate(range(m))])
y2 = np.array([[d[jx, ix] - t[jx] for ix, i in enumerate(range(n))] for jx, _ in enumerate(range(m))])
else:
raise ValueError('Something is wrong.')
test = TEST(t=data_t, a=data_a, b=0, c=0, d=data_d)
I am obviously doing something wrong related to how Python interprets if combined with and but I have no idea how is the correct way to do it. I end up getting the ValueError I created.
How is the correct way to do what I am trying to do to?
Thank you, #Chris Doyle, for setting me straight. As he said in his comment above, I should set the conditional to be if a == 0 and d == 0. I also improved the rest by:
def TEST(t, a=0, b=0, c=0, d=0):
if a == 0 and d == 0:
x1 = np.array([[i + j for i in k] for j, k in zip(t, b)])
y1 = np.array([[i + j for i in k] for j, k in zip(t, c)])
return x1, y1
elif b == 0 and c == 0:
x2 = np.array([[i - j for i in k] for j, k in zip(t, a)])
y2 = np.array([[i - j for i in k] for j, k in zip(t, d)])
return x2, y2
else:
raise ValueError('Something is wrong.')
Everything works fine now and it is all thanks to #Chris Doyle. Thank you again.
I have to convert the following MATLAB code:
% calculate ECDF from D at n points
function X = ECDF_representation(D, n)
m = mean(D); X = [];
for d=1:size(D,2),
[f, x] = ecdf(D(:,d)+
randn(size(D(:,d)))*0.01);
ll=interp1(f,x,linspace(0,1,n),'cubic');
X=[X ll];
end
X= [X m];
end
into Python. Here's what I've tried so far:
import numpy as np
from scipy import interpolate
from statsmodels.distributions.empirical_distribution import ECDF
def ecdf_representation(D, n):
"""calculate ECDF from D at n points"""
m = np.mean(D)
X = []
for d in xrange(D.shape[1] + 1):
f, x = ECDF([D[:, d] + np.random.randn(np.shape(D[:, d])) * 0.01])
ll = interpolate.interp1d(f, x, np.linspace(0, 1, n), 'cubic')
X = [X, ll]
X = [X, m]
return X
I get the error:
in ecdf_representation
for d in xrange(D.shape[1] + 1):
IndexError: tuple index out of range
If I switch the line for d in xrange(D.shape[1] + 1): to for d in xrange(D.shape[0] + 1):
I resolve the original error but get a new one that is:
line 25, in ecdf_representation
f, x = ECDF([D[:, d] + np.random.randn(np.shape(D[:, d])) * 0.01])
IndexError: too many indices for array
EDIT:
D is a 1-d array because of the value being returned in the following function where data is a pandas data frame.
I think I should be returning something else into the ecdf_representation function, but I'm not sure what.
def segment_energy(data, th):
mag = np.linalg.norm(data.loc[:, ['x', 'y', 'z']], axis=1)
mag = np.array(mag)
mag -= np.mean(mag)
above = np.where(mag >= th * np.std(mag))
indicator = np.zeros(mag.shape)
indicator[above] = 1
plt.plot(mag)
plt.plot(indicator * 1000, 'r')
plt.show()
return indicator
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 6 years ago.
Improve this question
Update 1.0 start
It seems when call
for i, Wi in enumerate(W.T):
idx.append(i)
result.append(pool.apply_async(ALS_Y, (X, Wi, Q, lambda_, n_factors, i,)))
the arguments passed into the function ALS_Y/ALS_X are not references, it copied the arguments..So, when X or Yis very large matrixes, such as, in my case, it's 6000*40 or so(And it's a for-loop, let's assume the number iterations is 50 000, so ...), it exceeds the limit of memory.
And then I tried using global arguments, just passing the indices as parameters into the functions,
import multiprocessing
import time
import numpy as np
def func(idx):
global a
a[idx] += 1
if __name__ == "__main__":
a=range(10)
for j in xrange(2):
pool = multiprocessing.Pool(processes=8)
result = []
for i in xrange(10):
result.append(pool.apply_async(func, (i, )))
pool.close()
pool.join()
print a
print "Sub-process(es) done."
it outputs: `
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Sub-process(es) done.
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Sub-process(es) done.
So, this means it still copieda`!
Now, I wonder is there any way to handler this issue? Appreciate!
Update 1.0 end
Below is my code in python to solve the matrix factorization problem.
W = XY. However, the codes below is not efficient, and I hope that it can be converted into parallel version, using GPU is the best, CPU is also fine. I am have no experience about parallel programming, so is there anybody can give me some advice?
Below is the code to factorize the matrix using ALS (alternating least square, details here)
for ii in range(n_iterations):
for u, Wu in enumerate(W):
X[u] = np.linalg.solve(np.dot(Y, np.dot(np.diag(Wu), Y.T)) + lambda_ * np.eye(n_factors),
np.dot(Y, np.dot(np.diag(Wu), Q[u].T))).T #X_inner loop
for i, Wi in enumerate(W.T):
Y[:,i] = np.linalg.solve(np.dot(X.T, np.dot(np.diag(Wi), X)) + lambda_ * np.eye(n_factors), #Y_inner loop
np.dot(X.T, np.dot(np.diag(Wi), Q[:, i])))#Y_inner loop
error = get_error(Q, X, Y, W)
weighted_errors.append(error)
print '{}th iteration is completed'.format(ii)
After used multiprocessing lib, my code now:
def ALS_X(Y, Wu, Q, lambda_, n_factors, u):
return np.linalg.solve(np.dot(Y, np.dot(np.diag(Wu), Y.T)) + lambda_ * np.eye(n_factors),
np.dot(Y, np.dot(np.diag(Wu), Q[u].T))).T
for ii in range(n_iterations):
pool = multiprocessing.Pool(processes=12)#create pool
result = []#store each row for X
idx = []#store the row number
for u, Wu in enumerate(W):
idx.append(u)
result.append(pool.apply_async(ALS_X, (Y, Wu, Q, lambda_, n_factors, u,)))
pool.close()
pool.join()
for u, vector in zip(idx, result):
X[u] = vector.get()#assign the result to X
######################################
pool = multiprocessing.Pool(processes=12)#for Y, much similar to X
result = []
idx = []
for i, Wi in enumerate(W.T):
idx.append(i)
result.append(pool.apply_async(ALS_Y, (X, Wi, Q, lambda_, n_factors, i,)))
pool.close()
pool.join()
for i, vector in zip(idx, result):
Y[:,i] = vector.get()
error = get_error(Q, X, Y, W)
weighted_errors.append(error)
print '{}th iteration is completed'.format(ii), 'error: ',error
But somewhat misery, the program always crashed silently...
Below is the whole bunch of my code.. it's all in messy. JUST ignore load_data get_error and vec2str, since here I generate the matrix randomly..
import pandas as pd
import numpy as np
import multiprocessing
def vec2str(vec):
res = ''
for dim in len(vec):
res += str(vec[dim]) + ','
return res
def load_data(heads, filename, sep,header=None):
data = pd.read_table(filename, sep=sep, header=header, names=heads)
rp = data.pivot_table(columns=['sid'],index=['uid'],values=['rating'])#not generally...
Q = rp.fillna(0)
Q = Q.values
W = Q >0.5
W[W == True] = 1
W[W == False] = 0
W = W.astype(np.float64, copy=False)
return Q, W, rp
def get_error(Q, X, Y, W):
return np.sum((W * (Q - np.dot(X, Y)))**2)
'''
X[u] = np.linalg.solve(np.dot(, np.dot(np.diag(), .T)) + * np.eye(),
np.dot(, np.dot(np.diag(), Q[u].T))).T
'''
def ALS_X(Y, Wu, Q, lambda_, n_factors, u):
return np.linalg.solve(np.dot(Y, np.dot(np.diag(Wu), Y.T)) + lambda_ * np.eye(n_factors),
np.dot(Y, np.dot(np.diag(Wu), Q[u].T))).T
'''
Y[:,i] = np.linalg.solve(np.dot(X.T, np.dot(np.diag(Wi), X)) + lambda_ * np.eye(n_factors),
np.dot(X.T, np.dot(np.diag(Wi), Q[:, i])))
'''
def ALS_Y(X, Wi, Q, lambda_, n_factors, i):
return np.linalg.solve(np.dot(X.T, np.dot(np.diag(Wi), X)) + lambda_ * np.eye(n_factors),
np.dot(X.T, np.dot(np.diag(Wi), Q[:, i])))
if __name__ == "__main__":
lambda_ = 0.1
n_factors = 40
filename = 'data_songID'
n_iterations = 20
#Q, W, rp = load_data(['uid', 'sid', 'rating'], filename, ',')
Q = np.random.rand(1000,1000)
m, n = Q.shape
W = np.eye(1000)
print 'Loading data finished, ', 'size: ', Q.shape
print 'Settings ', 'lambda = {}'.format(lambda_), 'n_factors = {}'.format(n_factors)
X = 5 * np.random.rand(m, n_factors)
Y = 5 * np.random.rand(n_factors, n)
errors = []
for ii in range(n_iterations):
X = np.linalg.solve(np.dot(Y, Y.T) + lambda_ * np.eye(n_factors),
np.dot(Y, Q.T)).T
Y = np.linalg.solve(np.dot(X.T, X) + lambda_ * np.eye(n_factors),
np.dot(X.T, Q))
if ii % 100 == 0:
print('{}th iteration is completed'.format(ii))
errors.append(get_error(Q, X, Y, W))
Q_hat = np.dot(X, Y)
print('Error of rated movies: {}'.format(get_error(Q, X, Y, W)))
print errors
#####ALS start....#####
print '*'*100
weighted_errors = []
for ii in range(n_iterations):
pool = multiprocessing.Pool(processes=12)
result = []
idx = []
for u, Wu in enumerate(W):
idx.append(u)
result.append(pool.apply_async(ALS_X, (Y, Wu, Q, lambda_, n_factors, u,)))
pool.close()
pool.join()
for u, vector in zip(idx, result):
X[u] = vector.get()
######################################
pool = multiprocessing.Pool(processes=12)
result = []
idx = []
for i, Wi in enumerate(W.T):
idx.append(i)
result.append(pool.apply_async(ALS_Y, (X, Wi, Q, lambda_, n_factors, i,)))
pool.close()
pool.join()
for i, vector in zip(idx, result):
Y[:,i] = vector.get()
error = get_error(Q, X, Y, W)
weighted_errors.append(error)
print '{}th iteration is completed'.format(ii), 'error: ',error
weighted_Q_hat = np.dot(X,Y)
print weighted_errors
X.tofile('X.bin')
Y.tofile('Y.bin')
latent_user_file = open('user_latent','w')
for idx in len(rp.axes[0]):
latent_user_file.write(str(rp.axes[0][idx]) + '\t' + vec2str(X[idx,:]) + '\n')
latent_mid_file = open('mid_latent', 'w')
for idx in len(rp.axes[1]):
latent_mid_file.write(str(rp.axes[1][idx]) + '\t' + vec2str(Y.T[idx,:]) + '\n')