tf.while_loop only makes only one loop - python

After days of trying to apply a tf.while_loop, I still fail to understand how it works (or rather why it does not). The documentations and various questions here on StackOverflow haven't helped so far.
The main idea is to train the different columns of a tensor trueY separately using a while_loop. The problem is that when I trace this code, I see that the while_loop gets called only once.
I'd like to dynamically assign names to variables created in the while_loop so as to be able to access them outside the while_loop after they have been created (thus the "gen_name" function trying to dynamically generate names for the dense layers created in each loop), and make tf.while_loop run n times this way.
Here is a sample of my code with the issue (not the full code and modified to demonstrate this problem)
...................
config['dim_y'] = 10
Xl = tf.placeholder( self.dtype, shape=(batchsize, config['dim_x']) )
Yl = tf.placeholder( self.dtype, shape=(batchsize, config['dim_y']) )
Gl = tf.placeholder( self.dtype, shape=(batchsize, config['dim_g']) )
costl, cost_m, self.cost_b = self.__cost( Xl, Yl, Gl, False )
def __eval_cost( self, A, X, Y, G, reuse ):
AGXY = tf.concat( [A, G, X, Y], -1 )
Z, mu_phi3, ls_phi3 = build_nn( AGXY, ...., reuse )
_cost = -tf.reduce_sum( ls_phi3, -1 )
_cost += .5 * tf.reduce_sum( tf.pow( mu_phi3, 2 ), -1 )
_cost += .5 * tf.reduce_sum( tf.exp( 2*ls_phi3 ), -1 )
return _cost
def __cost( self, trueX, trueY, trueG, reuse ):
........
columns = tf.unstack(trueY, axis=-1)
AGX = tf.concat( [ AX, G ], -1 )
pre_Y = self.build_nn( AGX, ....., reuse )
index_loop = (tf.constant(0), _cost, _cost_bl)
def condition(index, _cost, _cost_supervised_bi_label):
return tf.less(index, self.config['dim_y'])
def bodylabeled(index, _cost, _cost_bl):
def gen_name(var_name):
# split eg 'cost/while/strided_slice_5:0' => '5'
# split eg 'cost/while/strided_slice:0' => 'slice'
iter = var_name.split('/')[-1].split(':')[0].split('_')[-1]
if iter == "slice":
return '0phi2y'
else:
return '{}phi2y'.format(int(iter) % self.config['dim_y'])
y_i = tf.gather(columns, index)
y = tf.expand_dims( tf.one_hot(tf.to_int32(y_i, name='ToInt32'), depth, dtype=self.dtype ), 0 )
Y = tf.tile( y, [self.config['L'],1,1] )
c = tf.constant(0, name='test')
log_pred_Y = tf.layers.dense( pre_Y, 2, name=gen_name(iter[index].name), reuse=reuse )
log_pred_Y = log_pred_Y - tf.reduce_logsumexp( log_pred_Y, -1, keep_dims=True )
_cost += self.__eval_cost_given_axgy( A, X, Y, G, reuse=tf.AUTO_REUSE )
_cost_bl += -tf.reduce_sum( tf.multiply( Y, log_pred_Y ), -1 )
return tf.add(index, 1), _cost, _cost_supervised_bi_label
_cost, _bl = tf.while_loop(condition, bodylabeled, index_loop, parallel_iterations=1, shape_invariants=(index_loop[0].get_shape(), tf.TensorShape([None, 100]), tf.TensorShape([None, 100])))[1:]
op = costl + cost_m + cost_b
with tf.Session(config=config) as sess:
sess.run( tf.global_variables_initializer() )
sess.run(tf.local_variables_initializer())
for batchl in batches:
sess.run( op,
feed_dict={Xl:Xl[batchl,:],
Yl:Yl[batchl,:].toarray(),
Gl:Gl[batchl,:].toarray(),
is_training:True } )
for n in tf.get_default_graph().as_graph_def().node:
print(n.name)

Related

Represent multiple normal priors with a single call in Bayesian logistic regression with Tensorflow

I am trying to properly represent multiple normal priors with a function call in the joint distribution and run mcmc.sample_chain with NUTS kernel. I used the excellent blog post here for a sketch with the following MCMC implementation.
def trace_fn(_, pkr):
return (
pkr.inner_results.inner_results.target_log_prob,
pkr.inner_results.inner_results.leapfrogs_taken,
pkr.inner_results.inner_results.has_divergence,
pkr.inner_results.inner_results.energy,
pkr.inner_results.inner_results.log_accept_ratio
)
def run_nuts(
target_log_prob_fn,
inits,
trace_fn=trace_fn,
bijectors_list=None,
num_steps=5000,
num_burnin=500,
n_chains=n_chains):
step_size = np.random.rand(n_chains, 1)*.5 + 1.
if not isinstance(inits, list):
inits = [inits]
if bijectors_list is None:
bijectors_list = [tfb.Identity()]*len(inits)
kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
tfp.mcmc.TransformedTransitionKernel(
inner_kernel=tfp.mcmc.NoUTurnSampler(
target_log_prob_fn,
step_size=[step_size]*len(inits)
),
bijector=bijectors_list
),
target_accept_prob=.8,
num_adaptation_steps=int(0.8*num_burnin),
step_size_setter_fn=lambda pkr, new_step_size: pkr._replace(
inner_results=pkr.inner_results._replace(step_size=new_step_size)
),
step_size_getter_fn=lambda pkr: pkr.inner_results.step_size,
log_accept_prob_getter_fn=lambda pkr: pkr.inner_results.log_accept_ratio,
)
res = tfp.mcmc.sample_chain(
num_results=num_steps,
num_burnin_steps=num_burnin,
current_state=inits,
kernel=kernel,
trace_fn=trace_fn
)
return res
I can get the MCMC working when individually specifying the priors but not when declaring them as a batch.
This works
dtype=tf.float32
root = tfd.JointDistributionCoroutine.Root
def basic_logistic(data_df):
def _generator():
a = yield root(tfd.Sample(tfd.Normal(0,10),1, name='a'))
b = yield root(tfd.Sample(tfd.Normal(0,10),1, name='b'))
c = yield root(tfd.Sample(tfd.Normal(0,10),1, name='c'))
l = a+tf.cast(data_df['x1'],dtype)*b + tf.cast(data_df['x2'],dtype)*c
print(l)
y = yield tfd.Independent(
tfd.Bernoulli(
logits = l,
name = 'success'
),
reinterpreted_batch_ndims=1
)
return tfd.JointDistributionCoroutine(_generator)
arm_0_test = basic_logistic(arm_0_test_df)
arm_0_log_prob = lambda *args: arm_0_test.log_prob(args + (tf.cast(arm_0_test_df['y'],dtype),))
n_chains=3
arm0_res = run_nuts(arm_0_log_prob, [tf.ones((n_chains,1)), tf.ones((n_chains,1)), tf.ones((n_chains,1))])
This does not
dtype=tf.float32
root = tfd.JointDistributionCoroutine.Root
def basic_logistic_multiple(X_df):
X_df_copy = X_df.copy()
n_features = X_df_copy.shape[1] + 1 # have to include intercept term
prior_means = [0 for i in range(n_features)] # list of prior means
print(prior_means)
prior_sds = [10 for i in range(n_features)] # list of prior sds
X_df_copy.insert(0, 'intercept', np.ones(X_df_copy.shape[0])) # Add column of 1s for intercept
X = tf.convert_to_tensor(X_df_copy, dtype=dtype)
def _generator():
beta = yield root(tfd.Sample(
tfd.Normal(prior_means, prior_sds, name='beta')
))
print(beta)
l = tf.tensordot(X, beta, axes=1)
# l = tf.reshape(l, (l.shape[0], ))
print(l)
y = yield tfd.Independent(
tfd.Bernoulli(
logits = l,
name = 'success'
),
reinterpreted_batch_ndims=1
)
return tfd.JointDistributionCoroutine(_generator)
arm_0_test = basic_logistic_multiple(arm_0_test_df)
arm_0_log_prob = lambda *args: arm_0_test.log_prob(args + (tf.cast(arm_0_test_df['y'],dtype),))
n_chains=3
init_beta, _ = arm_0_test.sample(n_chains)
init_beta = tf.zeros_like(init_beta)
arm0_res = run_nuts(arm_0_log_prob, [init_beta,])
I get the following error
ValueError: Dimensions must be equal, but are 3 and 1000000 for '{{node mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/transformed_kernel_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/JointDistributionCoroutine_CONSTRUCTED_AT_top_level/log_prob/add_1}} = AddV2[T=DT_FLOAT](mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/transformed_kernel_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/JointDistributionCoroutine_CONSTRUCTED_AT_top_level/log_prob/add, mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/transformed_kernel_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/JointDistributionCoroutine_CONSTRUCTED_AT_top_level/log_prob/Independentsuccess/log_prob/Sum)' with input shapes: [3,3], [1000000].
I can sample from both Jointdistributions fine so I believe it is something clashing in the sample_chain function. Possibly my initial state declaration?
Thanks for any help!

Numba compilation fails in parallel for loop

I am trying to make use of prange in Numba to paralellize a routine that shifts arrays over an irregularly defined grid. In this example, the grid is just equidistant for simplicity.
Here's the code
import numpy as np
from numba import jit, prange
#jit( nopython=True, parallel=True )
def roll_arrays( a, shift_values,x_grid ):
x_max = np.amax(x_grid)
total_items = a.shape[0]
the_ddtype = a.dtype
result = np.zeros( (a.shape[0], a.shape[1] ), dtype=the_ddtype )
for k in prange( total_items ):
edge_val_left = a[k,0]
edge_val_right = a[k,-1]
#extend grid to edges with boundary values (flat extrapolation)
extended_boundary = np.abs( shift_values[k] )#positive or negative depending on shift
if( shift_values[k] != 0.0 ):
x0_right = np.linspace( x_max, x_max + extended_boundary, 10 )
x0_left = np.linspace( -x_max-extended_boundary, -x_max, 10 )
if( shift_values[k]>0.0 ):
#we fill left values
x_dense_grid = np.concatenate( ( x0_left, x_grid + shift_values[k] ) )
x_dense_grid = np.unique( x_dense_grid )
ynew = np.concatenate( ( edge_val_left*np.ones( 10 ), a[k,:] ) )
elif( shift_values[k]<0.0 ):
x_dense_grid = np.concatenate( ( x_grid + shift_values[k], x0_right ) )
x_dense_grid = np.unique( x_dense_grid )
ynew = np.concatenate( ( a[k,:], edge_val_right*np.ones( 10 ) ) )
max_index = x_dense_grid.shape[0] - 1
mid_x_index = np.int( x_dense_grid.shape[0]/2 )
shift_index = - ( np.argmin( np.abs( x_dense_grid - shift_values[k]) ) - mid_x_index )
if ( shift_index < 0 ):
ynew[ max_index+shift_index: ] = edge_val_right*np.ones( np.int(1+ np.abs(shift_index)) )
elif ( shift_index > 0 ):
ynew[ 0:shift_index ] = edge_val_left*np.ones( np.int(np.abs(shift_index)) )
if( shift_index ==0 ):
print("I'm zero")
#return on the original grid
f_interp = np.interp( x_grid, x_dense_grid, ynew )
result[k,:] = f_interp
else:
#no shift
result[k,:] = a[k,:]
return result
x = np.linspace(-10,10, 2001)
shifts = np.array([-1.0,-2.0,1.0])
f = np.array( [ k/( x**2 + k**2 ) for k in range(1,shifts.shape[0]) ] )
fs = roll_arrays( f, shifts, x)
print("done")
The problem is the use of "prange" in the k-th loop, as the error displayed i:
TypeError: Failed in nopython mode pipeline (step: convert to parfors)
object of type 'NoneType' has no len()
So I'm guessing the convert to parfors fails due to some race condition or equivalent; is there a way to overcome this issue in the above routine? The error does not occur if I drop the "parallel=True" decorator.

Model Overfits after adding bias

Implementation of 4bit up counter. First, I have implemented the model without using the bias term. the model seems to have worked correctly but after adding the bias term the model overfits at the very initial stage and the loss becomes zero. Even for the unseen data, the model predicts the same output as of training data. Below is the implementation of the same. What is the problem...
import numpy as np
import matplotlib.pyplot as plt
#Batch training
#input & output
x = np.array([[0,0,1,0],[0,0,0,0],[0,0,0,1],[0,0,1,1],[0,1,0,0],[0,1,0,1],[0,1,1,0],[0,1,1,1],[1,0,0,0],[1,0,0,1]]) # 10*4
y = np.array([[0,0,1,1],[0,0,0,1],[0,0,1,0],[0,1,0,0],[0,1,0,1],[0,1,1,0],[0,1,1,1],[1,0,0,0],[1,0,0,1],[1,0,1,0]]) # 10*4
def sigmoid(x):
return 1/(1+np.exp(-x))
def sigmoid_prime(x):
return sigmoid(x)*(1-sigmoid(x))
Input_Size = 4
Output_Size = 4
Hidden_Layer_Neurons = 8
Learning_Rate = 0.01
weight1 = np.random.uniform( size = ( Input_Size, Hidden_Layer_Neurons ) ) # 4*8
weight2 = np.random.uniform( size = ( Hidden_Layer_Neurons, Output_Size ) ) # 8*4
loss = []
iteration = []
bias1 = np.random.uniform( size = ( x.shape[0], Hidden_Layer_Neurons ) )
bias2 = np.random.uniform( size = ( x.shape[0], Output_Size ) )
for i in range(30000):
a1 = x #10*4
z2 = np.dot( a1, weight1 ) + bias1 # 10*4 ** 4*8 = 10*8
a2 = sigmoid(z2) # 10*8
z3 = np.dot( a2, weight2 ) + bias2 # 10*8 ** 8*4 = 10*4
val = 0
err1 = 0
if i > 100:
for j in range(10):
for k in range(4):
val += (y[j][k]-z3[j][k])*(y[j][k]-z3[j][k])
val = val/(2*10)
loss.append(val);
iteration.append(i)
del_out = ( z3 - y ) # 10*4 - 10*4 = 10*4
weight2 -= Learning_Rate*np.dot( a2.T, del_out )#8*10 ** 10*4= 8*4
bias2 -= Learning_Rate*del_out
err = np.dot(del_out, weight2.T)*sigmoid_prime(z2) #10*4 ** 4*8 = 10*8 * 10*8= 10*8
weight1 -= Learning_Rate*np.dot( a1.T, err ) #4*10 ** 10*8 = 4*8
bias1 -= Learning_Rate*err
print(z3)
plt.plot( iteration, loss )
plt.show()
def model():
q = np.array([[1,0,1,0],[1,0,1,1],[1,1,0,0], [1,1,0,1], [1,1,1,0], [1,0,0,0],[1,1,1,1],[0,0,1,1],[0,0,0,1],[0,0,1,0]])
z = np.dot(q, weight1) + bias1
act_hidden = sigmoid(z)
output = np.dot(act_hidden, weight2) + bias2
print(output)
model()
Why bias adding creates a problem here and when we should add bias?

Gradients error using TensorArray Tensorflow

i am trying to implement multidimentional lstm in tensorflow, I am using TensorArray to remember previous states, i am using a complicated way to get two neigbours state(above and from left). tf.cond want that both posible condition to exist and to have the same number of inputs. this is why i added one more cell.zero_state to the (last index +1) of the states. then i using a function to get the correct indexes to the states. when i am trying to use an optimizer in order to minimize a cost, i getting that error:
InvalidArgumentError (see above for traceback): TensorArray
MultiDimentionalLSTMCell-l1-multi-l1/state_ta_262#gradients: Could not
read from TensorArray index 809 because it has not yet been written
to.
Can someone tell how to fix it?
Ps: without optimizer it works!
class MultiDimentionalLSTMCell(tf.nn.rnn_cell.RNNCell):
"""
Note that state_is_tuple is always True.
"""
def __init__(self, num_units, forget_bias=1.0, activation=tf.nn.tanh):
self._num_units = num_units
self._forget_bias = forget_bias
self._activation = activation
#property
def state_size(self):
return tf.nn.rnn_cell.LSTMStateTuple(self._num_units, self._num_units)
#property
def output_size(self):
return self._num_units
def __call__(self, inputs, state, scope=None):
"""Long short-term memory cell (LSTM).
#param: imputs (batch,n)
#param state: the states and hidden unit of the two cells
"""
with tf.variable_scope(scope or type(self).__name__):
c1,c2,h1,h2 = state
# change bias argument to False since LN will add bias via shift
concat = tf.nn.rnn_cell._linear([inputs, h1, h2], 5 * self._num_units, False)
i, j, f1, f2, o = tf.split(1, 5, concat)
new_c = (c1 * tf.nn.sigmoid(f1 + self._forget_bias) +
c2 * tf.nn.sigmoid(f2 + self._forget_bias) + tf.nn.sigmoid(i) *
self._activation(j))
new_h = self._activation(new_c) * tf.nn.sigmoid(o)
new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)
return new_h, new_state
def multiDimentionalRNN_whileLoop(rnn_size,input_data,sh,dims=None,scopeN="layer1"):
"""Implements naive multidimentional recurent neural networks
#param rnn_size: the hidden units
#param input_data: the data to process of shape [batch,h,w,chanels]
#param sh: [heigth,width] of the windows
#param dims: dimentions to reverse the input data,eg.
dims=[False,True,True,False] => true means reverse dimention
#param scopeN : the scope
returns [batch,h/sh[0],w/sh[1],chanels*sh[0]*sh[1]] the output of the lstm
"""
with tf.variable_scope("MultiDimentionalLSTMCell-"+scopeN):
cell = MultiDimentionalLSTMCell(rnn_size)
shape = input_data.get_shape().as_list()
if shape[1]%sh[0] != 0:
offset = tf.zeros([shape[0], sh[0]-(shape[1]%sh[0]), shape[2], shape[3]])
input_data = tf.concat(1,[input_data,offset])
shape = input_data.get_shape().as_list()
if shape[2]%sh[1] != 0:
offset = tf.zeros([shape[0], shape[1], sh[1]-(shape[2]%sh[1]), shape[3]])
input_data = tf.concat(2,[input_data,offset])
shape = input_data.get_shape().as_list()
h,w = int(shape[1]/sh[0]),int(shape[2]/sh[1])
features = sh[1]*sh[0]*shape[3]
batch_size = shape[0]
x = tf.reshape(input_data, [batch_size,h,w, features])
if dims is not None:
x = tf.reverse(x, dims)
x = tf.transpose(x, [1,2,0,3])
x = tf.reshape(x, [-1, features])
x = tf.split(0, h*w, x)
sequence_length = tf.ones(shape=(batch_size,), dtype=tf.int32)*shape[0]
inputs_ta = tf.TensorArray(dtype=tf.float32, size=h*w,name='input_ta')
inputs_ta = inputs_ta.unpack(x)
states_ta = tf.TensorArray(dtype=tf.float32, size=h*w+1,name='state_ta',clear_after_read=False)
outputs_ta = tf.TensorArray(dtype=tf.float32, size=h*w,name='output_ta')
states_ta = states_ta.write(h*w, tf.nn.rnn_cell.LSTMStateTuple(tf.zeros([batch_size,rnn_size], tf.float32),
tf.zeros([batch_size,rnn_size], tf.float32)))
def getindex1(t,w):
return tf.cond(tf.less_equal(tf.constant(w),t),
lambda:t-tf.constant(w),
lambda:tf.constant(h*w))
def getindex2(t,w):
return tf.cond(tf.less(tf.constant(0),tf.mod(t,tf.constant(w))),
lambda:t-tf.constant(1),
lambda:tf.constant(h*w))
time = tf.constant(0)
def body(time, outputs_ta, states_ta):
constant_val = tf.constant(0)
stateUp = tf.cond(tf.less_equal(tf.constant(w),time),
lambda: states_ta.read(getindex1(time,w)),
lambda: states_ta.read(h*w))
stateLast = tf.cond(tf.less(constant_val,tf.mod(time,tf.constant(w))),
lambda: states_ta.read(getindex2(time,w)),
lambda: states_ta.read(h*w))
currentState = stateUp[0],stateLast[0],stateUp[1],stateLast[1]
out , state = cell(inputs_ta.read(time),currentState)
outputs_ta = outputs_ta.write(time,out)
states_ta = states_ta.write(time,state)
return time + 1, outputs_ta, states_ta
def condition(time,outputs_ta,states_ta):
return tf.less(time , tf.constant(h*w))
result , outputs_ta, states_ta = tf.while_loop(condition, body, [time,outputs_ta,states_ta])
outputs = outputs_ta.pack()
states = states_ta.pack()
y = tf.reshape(outputs, [h,w,batch_size,rnn_size])
y = tf.transpose(y, [2,0,1,3])
if dims is not None:
y = tf.reverse(y, dims)
return y
def tanAndSum(rnn_size,input_data,scope):
outs = []
for i in range(2):
for j in range(2):
dims = [False]*4
if i!=0:
dims[1] = True
if j!=0:
dims[2] = True
outputs = multiDimentionalRNN_whileLoop(rnn_size,input_data,[2,2],
dims,scope+"-multi-l{0}".format(i*2+j))
outs.append(outputs)
outs = tf.pack(outs, axis=0)
mean = tf.reduce_mean(outs, 0)
return tf.nn.tanh(mean)
graph = tf.Graph()
with graph.as_default():
input_data = tf.placeholder(tf.float32, [20,36,90,1])
#input_data = tf.ones([20,36,90,1],dtype=tf.float32)
sh = [2,2]
out1 = tanAndSum(20,input_data,'l1')
out = tanAndSum(25,out1,'l2')
cost = tf.reduce_mean(out)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
#out = multiDimentionalRNN_raw_rnn(2,input_data,sh,dims=[False,True,True,False],scopeN="layer1")
#cell = MultiDimentionalLSTMCell(10)
#out = cell.zero_state(2, tf.float32).c
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
ou,k,_ = session.run([out,cost,optimizer],{input_data:np.ones([20,36,90,1],dtype=np.float32)})
print(ou.shape)
print(k)
You should add parameter parallel_iterations=1 to your while loop call.
Such as:
result, outputs_ta, states_ta = tf.while_loop(
condition, body, [time,outputs_ta,states_ta], parallel_iterations=1)
This is required because inside body you perform read and write operations on the same tensor array (states_ta). And in case of parallel loop execution(parallel_iterations > 1) some thread may try to read info from tensorArray, that was not written to it by another one.
I've test your code snippet with parallel_iterations=1 on tensorflow 0.12.1 and it works as expected.

Faster Method Of Calculating Portfolio Volatility

I am writing a numba function to calculate a portfolio's volatility:
Some functions that I am using to do this are here:
import numba as nb
import numpy as np
def portfolio_s2( cv, weights ):
""" Calculate the variance of a portfolio """
return weights.dot( cv ).dot( weights )
#nb.jit( nopython=True )
def portfolio_s2c( cv, weights ):
s0 = 0.0
for i in range( weights.shape[0]-1 ):
wi = weights[i]
s0 += cv[i,i]*wi*wi
s1 = 0.0
for j in range( i+1, weights.shape[0] ):
s1 += wi*weights[j]*cv[i,j]
s0 += 2.0*s1
i = weights.shape[0]-1
wi = weights[ i ]
s0 += cv[i,i]*wi**2
return s0
#nb.jit( nopython=True )
def portfolio_s2b( cv, weights ):
s0 = 0.0
for i in range( weights.shape[0] ):
s0 += weights[i]*weights[i]*cv[i,i]
s1 = 0.0
for i in range( weights.shape[0]-1 ):
s2 = 0.0
for j in range( i+1, weights.shape[0] ):
s2 += weights[j]*cv[i,j]
s1+= weights[i]*s2
return s0+2.0*s1
I am testing the performance of the functions using this code:
N = 1000
num_tests = 10000
times_2b = []
times_2c = []
times_np = []
matrix_sizes = [ 2,4,8, 10, 20, 40, 80, 160 ]#, 320, 640, 1280, 2560 ]
for m in matrix_sizes:
X = np.random.randn( N, m )
cv = np.cov( X, rowvar=0 )
w = np.ones( cv.shape[0] ) / cv.shape[0]
s2 = helpers.portfolio_s2( cv, w )
s2b = helpers.portfolio_s2b( cv, w )
s2c = helpers.portfolio_s2c( cv, w )
np.testing.assert_almost_equal( s2, s2b )
np.testing.assert_almost_equal( s2, s2c )
with Timer( 'nb2b' ) as t2b:
for _ in range(num_tests):
helpers.portfolio_s2b( cv, w )
with Timer( 'nb2c' ) as t2c:
for _ in range(num_tests):
helpers.portfolio_s2c( cv, w )
with Timer( 'np' ) as tnp:
for _ in range(num_tests):
helpers.portfolio_s2( cv, w )
times_2b.append( t2b.timetaken )
times_2c.append( t2c.timetaken )
times_np.append( tnp.timetaken )
plt.figure()
plt.plot( matrix_sizes, times_2b, label='2b' )
plt.plot( matrix_sizes, times_2c, label='2c' )
plt.plot( matrix_sizes, times_np, label='np' )
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
This is the Timer class:
import time
class Timer( object ):
def __init__(self, name=''):
self._name = name
def __enter__(self):
self.start = time.time()
return self
def __exit__(self,a,b,c):
self.end = time.time()
self.timetaken = self.end-self.start
print( '{0} Took {1} seconds'.format( self._name, self.timetaken ))
The results are plotted here:
The results show that:
The numba versions of the function outperform the numpy version for matrix sizes under 80
The numba versions seem to scale worse than the numpy function
Why is this? Is there some sort of overhead associated with calling numpy, as opposed to calling numba?
Why does the numpy function scale better? Is it doing something fancy with BLAS in the background, or is it using a superior algorithm for the calculation?
Can I make the numba function scale as well as the numpy function?

Categories