How to parallel for-loop in python? [closed] - python

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 6 years ago.
Improve this question
Update 1.0 start
It seems when call
for i, Wi in enumerate(W.T):
idx.append(i)
result.append(pool.apply_async(ALS_Y, (X, Wi, Q, lambda_, n_factors, i,)))
the arguments passed into the function ALS_Y/ALS_X are not references, it copied the arguments..So, when X or Yis very large matrixes, such as, in my case, it's 6000*40 or so(And it's a for-loop, let's assume the number iterations is 50 000, so ...), it exceeds the limit of memory.
And then I tried using global arguments, just passing the indices as parameters into the functions,
import multiprocessing
import time
import numpy as np
def func(idx):
global a
a[idx] += 1
if __name__ == "__main__":
a=range(10)
for j in xrange(2):
pool = multiprocessing.Pool(processes=8)
result = []
for i in xrange(10):
result.append(pool.apply_async(func, (i, )))
pool.close()
pool.join()
print a
print "Sub-process(es) done."
it outputs: `
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Sub-process(es) done.
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Sub-process(es) done.
So, this means it still copieda`!
Now, I wonder is there any way to handler this issue? Appreciate!
Update 1.0 end
Below is my code in python to solve the matrix factorization problem.
W = XY. However, the codes below is not efficient, and I hope that it can be converted into parallel version, using GPU is the best, CPU is also fine. I am have no experience about parallel programming, so is there anybody can give me some advice?
Below is the code to factorize the matrix using ALS (alternating least square, details here)
for ii in range(n_iterations):
for u, Wu in enumerate(W):
X[u] = np.linalg.solve(np.dot(Y, np.dot(np.diag(Wu), Y.T)) + lambda_ * np.eye(n_factors),
np.dot(Y, np.dot(np.diag(Wu), Q[u].T))).T #X_inner loop
for i, Wi in enumerate(W.T):
Y[:,i] = np.linalg.solve(np.dot(X.T, np.dot(np.diag(Wi), X)) + lambda_ * np.eye(n_factors), #Y_inner loop
np.dot(X.T, np.dot(np.diag(Wi), Q[:, i])))#Y_inner loop
error = get_error(Q, X, Y, W)
weighted_errors.append(error)
print '{}th iteration is completed'.format(ii)
After used multiprocessing lib, my code now:
def ALS_X(Y, Wu, Q, lambda_, n_factors, u):
return np.linalg.solve(np.dot(Y, np.dot(np.diag(Wu), Y.T)) + lambda_ * np.eye(n_factors),
np.dot(Y, np.dot(np.diag(Wu), Q[u].T))).T
for ii in range(n_iterations):
pool = multiprocessing.Pool(processes=12)#create pool
result = []#store each row for X
idx = []#store the row number
for u, Wu in enumerate(W):
idx.append(u)
result.append(pool.apply_async(ALS_X, (Y, Wu, Q, lambda_, n_factors, u,)))
pool.close()
pool.join()
for u, vector in zip(idx, result):
X[u] = vector.get()#assign the result to X
######################################
pool = multiprocessing.Pool(processes=12)#for Y, much similar to X
result = []
idx = []
for i, Wi in enumerate(W.T):
idx.append(i)
result.append(pool.apply_async(ALS_Y, (X, Wi, Q, lambda_, n_factors, i,)))
pool.close()
pool.join()
for i, vector in zip(idx, result):
Y[:,i] = vector.get()
error = get_error(Q, X, Y, W)
weighted_errors.append(error)
print '{}th iteration is completed'.format(ii), 'error: ',error
But somewhat misery, the program always crashed silently...
Below is the whole bunch of my code.. it's all in messy. JUST ignore load_data get_error and vec2str, since here I generate the matrix randomly..
import pandas as pd
import numpy as np
import multiprocessing
def vec2str(vec):
res = ''
for dim in len(vec):
res += str(vec[dim]) + ','
return res
def load_data(heads, filename, sep,header=None):
data = pd.read_table(filename, sep=sep, header=header, names=heads)
rp = data.pivot_table(columns=['sid'],index=['uid'],values=['rating'])#not generally...
Q = rp.fillna(0)
Q = Q.values
W = Q >0.5
W[W == True] = 1
W[W == False] = 0
W = W.astype(np.float64, copy=False)
return Q, W, rp
def get_error(Q, X, Y, W):
return np.sum((W * (Q - np.dot(X, Y)))**2)
'''
X[u] = np.linalg.solve(np.dot(, np.dot(np.diag(), .T)) + * np.eye(),
np.dot(, np.dot(np.diag(), Q[u].T))).T
'''
def ALS_X(Y, Wu, Q, lambda_, n_factors, u):
return np.linalg.solve(np.dot(Y, np.dot(np.diag(Wu), Y.T)) + lambda_ * np.eye(n_factors),
np.dot(Y, np.dot(np.diag(Wu), Q[u].T))).T
'''
Y[:,i] = np.linalg.solve(np.dot(X.T, np.dot(np.diag(Wi), X)) + lambda_ * np.eye(n_factors),
np.dot(X.T, np.dot(np.diag(Wi), Q[:, i])))
'''
def ALS_Y(X, Wi, Q, lambda_, n_factors, i):
return np.linalg.solve(np.dot(X.T, np.dot(np.diag(Wi), X)) + lambda_ * np.eye(n_factors),
np.dot(X.T, np.dot(np.diag(Wi), Q[:, i])))
if __name__ == "__main__":
lambda_ = 0.1
n_factors = 40
filename = 'data_songID'
n_iterations = 20
#Q, W, rp = load_data(['uid', 'sid', 'rating'], filename, ',')
Q = np.random.rand(1000,1000)
m, n = Q.shape
W = np.eye(1000)
print 'Loading data finished, ', 'size: ', Q.shape
print 'Settings ', 'lambda = {}'.format(lambda_), 'n_factors = {}'.format(n_factors)
X = 5 * np.random.rand(m, n_factors)
Y = 5 * np.random.rand(n_factors, n)
errors = []
for ii in range(n_iterations):
X = np.linalg.solve(np.dot(Y, Y.T) + lambda_ * np.eye(n_factors),
np.dot(Y, Q.T)).T
Y = np.linalg.solve(np.dot(X.T, X) + lambda_ * np.eye(n_factors),
np.dot(X.T, Q))
if ii % 100 == 0:
print('{}th iteration is completed'.format(ii))
errors.append(get_error(Q, X, Y, W))
Q_hat = np.dot(X, Y)
print('Error of rated movies: {}'.format(get_error(Q, X, Y, W)))
print errors
#####ALS start....#####
print '*'*100
weighted_errors = []
for ii in range(n_iterations):
pool = multiprocessing.Pool(processes=12)
result = []
idx = []
for u, Wu in enumerate(W):
idx.append(u)
result.append(pool.apply_async(ALS_X, (Y, Wu, Q, lambda_, n_factors, u,)))
pool.close()
pool.join()
for u, vector in zip(idx, result):
X[u] = vector.get()
######################################
pool = multiprocessing.Pool(processes=12)
result = []
idx = []
for i, Wi in enumerate(W.T):
idx.append(i)
result.append(pool.apply_async(ALS_Y, (X, Wi, Q, lambda_, n_factors, i,)))
pool.close()
pool.join()
for i, vector in zip(idx, result):
Y[:,i] = vector.get()
error = get_error(Q, X, Y, W)
weighted_errors.append(error)
print '{}th iteration is completed'.format(ii), 'error: ',error
weighted_Q_hat = np.dot(X,Y)
print weighted_errors
X.tofile('X.bin')
Y.tofile('Y.bin')
latent_user_file = open('user_latent','w')
for idx in len(rp.axes[0]):
latent_user_file.write(str(rp.axes[0][idx]) + '\t' + vec2str(X[idx,:]) + '\n')
latent_mid_file = open('mid_latent', 'w')
for idx in len(rp.axes[1]):
latent_mid_file.write(str(rp.axes[1][idx]) + '\t' + vec2str(Y.T[idx,:]) + '\n')

Related

Implementation of an algorithm for simultaneous diagonalization

I am trying to write an implementation of an algorithm for the simultaneous diagonalization of two matrices (which are assumed to be simultaneously diagonalizable). However, the algorithm does not seem to converge. The algorithm is described in SIAM J. Matrix Anal. Appl. 14, 927 (1993).
Here is the first part of my code to set up a test case:
import numpy as np
import numpy.linalg as lin
from scipy.optimize import minimize
N = 3
# Unitary example matrix
X = np.array([
[-0.54717736-0.43779416j, 0.26046313+0.11082439j, 0.56151027-0.33692186j],
[-0.33452046-0.37890784j, -0.40907097-0.70730291j, -0.15344477+0.23100467j],
[-0.31253864-0.39468687j, 0.05342909+0.49940543j, -0.70062586+0.05835082j]
])
# Generate eigenvalues
LA = np.diag(np.arange(0, N))
LB = np.diag(np.arange(N, 2*N))
# Generate simultaneously diagonalizable matrices
A = X # LA # np.conj(X).T
B = X # LB # np.conj(X).T
This should generate two 3x3 matrices which are simultaneously diagonalizable, since they are constructed this way via X. The following code block then defines a few helper functions:
def off2(A, B):
"""Defines the distance from the matrices from
their diagonal form.
"""
C = np.abs(A) ** 2 + np.abs(B) ** 2
diag_idx = np.diag_indices(N)
C[diag_idx] = 0
return np.sum(C)
def Rijcs(i, j, c, s):
"""Function R(i, j, c, s) from the paper, see
Eq. (1) therein. Used for plane rotations in
the plane ij.
"""
res = np.eye(N, dtype=complex)
res[i, i] = c
res[i, j] = -np.conj(s)
res[j, i] = s
res[j, j] = np.conj(c)
return res
def cs(theta, phi):
"""Parametrization for c and s."""
c = np.cos(theta)
s = np.exp(1j * phi) * np.sin(theta)
return c, s
With these definitions, the algorithm can be implemented:
tol = 1e-10
Q = np.eye(N, dtype=complex)
while True:
off = off2(A, B)
# Print statement for debugging purposes
print(off)
# Terminate if the result is converged
if off <= tol * (lin.norm(A, "fro") + lin.norm(B, "fro")):
break
for i in range(N):
for j in range(i + 1, N):
def fij(c, s):
aij = A[i, j]
aji = A[j, i]
aii = A[i, i]
ajj = A[j, j]
bij = B[i, j]
bji = B[j, i]
bii = B[i, i]
bjj = B[j, j]
x = np.array(
[
[np.conj(aij), np.conj(aii - ajj), -np.conj(aji)],
[aji, (aii - ajj), -aij ],
[np.conj(bij), np.conj(bii - bjj), -np.conj(bji)],
[bji, (bii - bjj), -bij ]
]
)
y = np.array(
[
[c ** 2],
[c * s],
[s ** 2]
]
)
return lin.norm(x # y, 2)
# 5
result = minimize(
lambda x: fij(*cs(x[0], x[1])),
x0=(0, 0),
bounds=(
(-0.25 * np.pi, 0.25 * np.pi),
(-np.pi, np.pi)
),
)
theta, phi = result['x']
c, s = cs(theta, phi)
# 6
R = Rijcs(i, j, c, s)
# 7
Q = Q # R
A = np.conj(R).T # A # R
B = np.conj(R).T # B # R
As you can observe from the print statement, the "distance" of A and B from diagonal form does not really converge. Instead, the values printed range from 0.5 up to 3 and oscillate up and down. Is there a bug in this code and if so, where exactly is it?

Vectorizing theano/aesara variable operations

I am trying to compute the function below with theano/aesara in an preferably vectorized manner:
![image|620x182](upload://9Px5wAGjZdkBXVBg4fqmuSPorPr.png)
The solution i have is not vectorized and therefore way too slow:
def apply_adstock_with_lag(x, L, P, D):
"""
params:
x: original array
L: length
P: peak, delay in effect
D: decay, retain
"""
x = np.append(np.zeros(L - 1), x)
weights = [0 for _ in range(L)]
for l in range(L):
weight = D ** ((l - P) ** 2)
weights[L - 1 - l] = weight
weights = np.array(weights)
adstocked_x = []
for i in range(L - 1, len(x)):
x_array = x[i - L + 1:i + 1]
xi = sum(x_array * weights) / sum(weights)
adstocked_x.append(xi)
adstocked_x = tt.as_tensor_variable(adstocked_x)
return adstocked_x
An similar function although simplier and its vectorized solution can be found below, note that this is much much quicker probably due to the vectorized operations:
![image|252x39](upload://ucZeqCmCXcBRAHLdA7lJ0crs1Oz.png)
def adstock_geometric_theano_pymc3(x, theta):
x = tt.as_tensor_variable(x)
def adstock_geometric_recurrence_theano(index, input_x, decay_x, theta):
return tt.set_subtensor(decay_x[index], tt.sum(input_x + theta * decay_x[index - 1]))
len_observed = x.shape[0]
x_decayed = tt.zeros_like(x)
x_decayed = tt.set_subtensor(x_decayed[0], x[0])
output, _ = theano.scan(
fn=adstock_geometric_recurrence_theano,
sequences=[tt.arange(1, len_observed), x[1:len_observed]],
outputs_info=x_decayed,
non_sequences=theta,
n_steps=len_observed - 1
)
return output[-1]
I cant come up with the vectorized solution to my adstock-function, can anyone give it a go?
Have you tried:
def apply_adstock_with_lag(x, L, P, D):
adstocked_x = np.convolve(x, D**((np.arange(0, L, 1) - P)**2))[:-(L-1)] / sum(D**((np.arange(0, L, 1) - P)**2))
adstocked_x = at.as_tensor_variable(adstocked_x)
return adstocked_x
This should work

How to reduce run time in my python code?

I am currently working on a project that requires me to run a complete python code base. For research purpose, I need to run the code as fast as possible. Yet I am fairly new to programming and have no idea how to reduce run time. So I hope someone can help me on that. Any advice would be appreciated. Here's part of my code base, which used a lot of nested for loops, so it might significantly increase run time.
def a_j(r, a, A): # the Claussius-Mossotti factor, determined by a symmetric (3 × 3) matrix such that (A_i)^T = A_i
alph = np.array([[0,0,0],[0,0,0],[0,0,0]],complex)
for i in range(3):
for j in range(3):
alph[i,j] = (r * a * A[i,j])
return alph
def W_ext(x, k, rho, alpha, A): # particle–particle interaction term
n = x.shape[0] # the number of x vextors
result = np.zeros([3*n,3*n],complex)
u = np.zeros((n, 3)) # u = x - x'
for i in range(n):
for j in range(n):
if i != j:
u[i] = x[i] - x[j]
block_result = a_j(rho[i], alpha, A) * G((u[i]), k) * a_j(rho[j], alpha, A)
for m in range(3):
for l in range(3):
result[3*i + m, 3*j + l] = block_result[m,l]
return result.imag
def A_ext(rho, a, A): # single-particle term
n = rho.shape[0]
result = np.zeros([3*n,3*n],complex)
for i in range(n):
for j in range(n):
if i == j:
block_result = a_j(rho[i], a, A).imag
for m in range(3):
for l in range(3):
result[3*i + m, 3*j + l] = block_result[m,l]
return result # (3 x 3) matrix
def P_ext(e, A, W, omega):
eT = np.matrix.getH(e)
mm1 = np.matmul(A, e)
mm2 = np.matmul(W, e)
extinction = (np.dot(eT, mm1) + np.dot(eT, mm2)) * (omega/2.0)
return extinction
#ABSORPTION
def W_abs(x, k, rho, alpha, A, chi): # particle–particle interaction term
n = x.shape[0]
result = np.zeros([3*n,3*n],complex)
u = np.zeros((n, 3))
for i in range(n):
for j in range(n):
if i != j:
u[i] = x[i] - x[j]
block_result = np.matrix.getH(a_j(rho[i], alpha, A)) * (1.0 / np.conjugate(chi)).imag * a_j(rho[i], alpha, A) * G((u[i]), k) * a_j(rho[j], alpha, A)
for m in range(3):
for l in range(3):
result[3*i + m, 3*j + l] = block_result[m,l]
return 2.0 * result.real # (3 x 3) matrix
def A_abs(rho, a, A, chi): # single-particle term
n = rho.shape[0]
result = np.zeros([3*n,3*n],complex)
for i in range(n):
for j in range(n):
if i == j:
block_result = np.matrix.getH(a_j(rho[i], a, A)) * (1.0 / np.conjugate(chi)).imag * a_j(rho[i], a, A)
for m in range(3):
for l in range(3):
result[3*i + m, 3*j + l] = block_result[m,l]
return result # (3 x 3) matrix

Full algorithm (math) of natural cubic splines computation in Python?

I'm interested in full Python code (with math formulas) with all computations needed to calculate natural Cubic Splines from scratch. If possible, fast (e.g. Numpy-based).
I created this question only to share my code (as answer) that I programmed recently from scratch (based on Wikipedia) when learning cubic splines.
I programmed the following code based on Russian Wikipedia Article, as I see almost the same description and formulas are located in English Article.
To speed-up computation I used both Numpy and Numba.
To check the correctness of code I made tests with comparison to reference implementation of the natural cubic spline of scipy.interpolate.CubicSpline, you can see np.allclose(...) assertion in my code that proves my formulas are correct.
Also, I did timings:
calc (spline_scipy): Timed best=2.712 ms, mean=2.792 +- 0.1 ms
calc (spline_numba): Timed best=916.000 us, mean=938.868 +- 17.9 us
speedup: 2.973
use (spline_scipy): Timed best=5.262 ms, mean=5.320 +- 0.1 ms
use (spline_numba): Timed best=4.745 ms, mean=5.420 +- 0.3 ms
speedup: 0.981
which shows that my spline-params computation is around 3x times faster than the Scipy version and usage of spline (computation for given x) is the same speed as Scipy.
Running code below needs one-time installing following packages python -m pip install numpy numba scipy timerit, here scipy and timerit are only needed for testing purposes and not needed for actual algorithm.
Code draws plots showing original multi-line and both spline approximation for Scipy and Numba versions, as one can see Scipy and Numba lines are the same (meaning that spline computation is same):
Code:
Try it online!
import numpy as np, numba
# Solves linear system given by Tridiagonal Matrix
# Helper for calculating cubic splines
#numba.njit(
[f'f{ii}[:](f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:])' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def tri_diag_solve(A, B, C, F):
n = B.size
assert A.ndim == B.ndim == C.ndim == F.ndim == 1 and (
A.size == B.size == C.size == F.size == n
) #, (A.shape, B.shape, C.shape, F.shape)
Bs, Fs = np.zeros_like(B), np.zeros_like(F)
Bs[0], Fs[0] = B[0], F[0]
for i in range(1, n):
Bs[i] = B[i] - A[i] / Bs[i - 1] * C[i - 1]
Fs[i] = F[i] - A[i] / Bs[i - 1] * Fs[i - 1]
x = np.zeros_like(B)
x[-1] = Fs[-1] / Bs[-1]
for i in range(n - 2, -1, -1):
x[i] = (Fs[i] - C[i] * x[i + 1]) / Bs[i]
return x
# Calculate cubic spline params
#numba.njit(
#[f'(f{ii}, f{ii}, f{ii}, f{ii})(f{ii}[:], f{ii}[:])' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def calc_spline_params(x, y):
a = y
h = np.diff(x)
c = np.concatenate((np.zeros((1,), dtype = y.dtype),
np.append(tri_diag_solve(h[:-1], (h[:-1] + h[1:]) * 2, h[1:],
((a[2:] - a[1:-1]) / h[1:] - (a[1:-1] - a[:-2]) / h[:-1]) * 3), 0)))
d = np.diff(c) / (3 * h)
b = (a[1:] - a[:-1]) / h + (2 * c[1:] + c[:-1]) / 3 * h
return a[1:], b, c[1:], d
# Spline value calculating function, given params and "x"
#numba.njit(
[f'f{ii}[:](f{ii}[:], i8[:], f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:])' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def func_spline(x, ix, x0, a, b, c, d):
dx = x - x0[1:][ix]
return a[ix] + (b[ix] + (c[ix] + d[ix] * dx) * dx) * dx
#numba.njit(
[f'i8[:](f{ii}[:], f{ii}[:], b1)' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def searchsorted_merge(a, b, sort_b):
ix = np.zeros((len(b),), dtype = np.int64)
if sort_b:
ib = np.argsort(b)
pa, pb = 0, 0
while pb < len(b):
if pa < len(a) and a[pa] < (b[ib[pb]] if sort_b else b[pb]):
pa += 1
else:
ix[pb] = pa
pb += 1
return ix
# Compute piece-wise spline function for "x" out of sorted "x0" points
#numba.njit([f'f{ii}[:](f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:], f{ii}[:])' for ii in (4, 8)],
cache = True, fastmath = True, inline = 'always')
def piece_wise_spline(x, x0, a, b, c, d):
xsh = x.shape
x = x.ravel()
#ix = np.searchsorted(x0[1 : -1], x)
ix = searchsorted_merge(x0[1 : -1], x, False)
y = func_spline(x, ix, x0, a, b, c, d)
y = y.reshape(xsh)
return y
def test():
import matplotlib.pyplot as plt, scipy.interpolate
from timerit import Timerit
Timerit._default_asciimode = True
np.random.seed(0)
def f(n):
x = np.sort(np.random.uniform(0., n / 5 * np.pi, (n,))).astype(np.float64)
return x, (np.sin(x) * 5 + np.sin(1 + 2.5 * x) * 3 + np.sin(2 + 0.5 * x) * 2).astype(np.float64)
def spline_numba(x0, y0):
a, b, c, d = calc_spline_params(x0, y0)
return lambda x: piece_wise_spline(x, x0, a, b, c, d)
def spline_scipy(x0, y0):
f = scipy.interpolate.CubicSpline(x0, y0, bc_type = 'natural')
return lambda x: f(x)
def timings():
x0, y0 = f(10000)
s, t = {}, []
gs = [spline_scipy, spline_numba]
spline_numba(np.copy(x0[::3]), np.copy(y0[::3])) # pre-compile numba
for g in gs:
print('calc (', g.__name__, '): ', sep = '', end = '', flush = True)
tim = Timerit(num = 150, verbose = 1)
for _ in tim:
s_ = g(x0, y0)
s[g.__name__] = s_
t.append(tim.mean())
if len(t) >= 2:
print('speedup:', round(t[-2] / t[-1], 3))
print()
x = np.linspace(x0[0], x0[-1], 50000, dtype = np.float64)
t = []
s['spline_numba'](np.copy(x[::3])) # pre-compile numba
for i in range(len(s)):
print('use (', gs[i].__name__, '): ', sep = '', end = '', flush = True)
tim = Timerit(num = 100, verbose = 1)
sg = s[gs[i].__name__]
for _ in tim:
sg(x)
t.append(tim.mean())
if len(t) >= 2:
print('speedup:', round(t[-2] / t[-1], 3))
x0, y0 = f(50)
timings()
shift = 3
x = np.linspace(x0[0], x0[-1], 1000, dtype = np.float64)
ys = spline_scipy(x0, y0)(x)
yn = spline_numba(x0, y0)(x)
assert np.allclose(ys, yn), np.absolute(ys - yn).max()
plt.plot(x0, y0, label = 'orig')
plt.plot(x, ys, label = 'spline_scipy')
plt.plot(x, yn, '-.', label = 'spline_numba')
plt.legend()
plt.show()
if __name__ == '__main__':
test()

how can I convert this code (theano) to a simple python lign

how can I convert this code (theano) to a simple python lign
[h_vals, _, y_vals] = theano.scan(fn=lstm_Step,
sequences=[dict(input=inputs, taps=[0])],
outputs_info=[h0, c0, None],
non_sequences=[Whx, Whh, Wcx, Wch, Wyh, bh, bc, by],
strict=True)[0]
this is an example of what I means,
import theano
import theano.tensor as tt
def add_multiply(a, b, k):
return a + b + k, a * b * k
def python_main():
x = 1
y = 2
k = 1
tuples = []
for i in range(5):
x, y = add_multiply(x, y, k)
tuples.append((x, y, k))
return tuples
def theano_main():
x = tt.constant(1, dtype='uint32')
y = tt.constant(2, dtype='uint32')
k = tt.scalar(dtype='uint32')
outputs, _ = theano.scan(add_multiply, outputs_info=[x, y], non_sequences=[k], n_steps=5)
g = theano.grad(tt.sum(outputs), k)
f = theano.function(inputs=[k], outputs=outputs + [g])
tuples = []
xvs, yvs, _ = f(1)
for xv, yv in zip(xvs, yvs):
tuples.append((xv, yv, 1))
return tuples
print 'Python:', python_main()
print 'Theano:', theano_main()
So as you say #Nurzhan I need to know what this library does especially what's the means of theano.scan.

Categories