I am trying to properly represent multiple normal priors with a function call in the joint distribution and run mcmc.sample_chain with NUTS kernel. I used the excellent blog post here for a sketch with the following MCMC implementation.
def trace_fn(_, pkr):
return (
pkr.inner_results.inner_results.target_log_prob,
pkr.inner_results.inner_results.leapfrogs_taken,
pkr.inner_results.inner_results.has_divergence,
pkr.inner_results.inner_results.energy,
pkr.inner_results.inner_results.log_accept_ratio
)
def run_nuts(
target_log_prob_fn,
inits,
trace_fn=trace_fn,
bijectors_list=None,
num_steps=5000,
num_burnin=500,
n_chains=n_chains):
step_size = np.random.rand(n_chains, 1)*.5 + 1.
if not isinstance(inits, list):
inits = [inits]
if bijectors_list is None:
bijectors_list = [tfb.Identity()]*len(inits)
kernel = tfp.mcmc.DualAveragingStepSizeAdaptation(
tfp.mcmc.TransformedTransitionKernel(
inner_kernel=tfp.mcmc.NoUTurnSampler(
target_log_prob_fn,
step_size=[step_size]*len(inits)
),
bijector=bijectors_list
),
target_accept_prob=.8,
num_adaptation_steps=int(0.8*num_burnin),
step_size_setter_fn=lambda pkr, new_step_size: pkr._replace(
inner_results=pkr.inner_results._replace(step_size=new_step_size)
),
step_size_getter_fn=lambda pkr: pkr.inner_results.step_size,
log_accept_prob_getter_fn=lambda pkr: pkr.inner_results.log_accept_ratio,
)
res = tfp.mcmc.sample_chain(
num_results=num_steps,
num_burnin_steps=num_burnin,
current_state=inits,
kernel=kernel,
trace_fn=trace_fn
)
return res
I can get the MCMC working when individually specifying the priors but not when declaring them as a batch.
This works
dtype=tf.float32
root = tfd.JointDistributionCoroutine.Root
def basic_logistic(data_df):
def _generator():
a = yield root(tfd.Sample(tfd.Normal(0,10),1, name='a'))
b = yield root(tfd.Sample(tfd.Normal(0,10),1, name='b'))
c = yield root(tfd.Sample(tfd.Normal(0,10),1, name='c'))
l = a+tf.cast(data_df['x1'],dtype)*b + tf.cast(data_df['x2'],dtype)*c
print(l)
y = yield tfd.Independent(
tfd.Bernoulli(
logits = l,
name = 'success'
),
reinterpreted_batch_ndims=1
)
return tfd.JointDistributionCoroutine(_generator)
arm_0_test = basic_logistic(arm_0_test_df)
arm_0_log_prob = lambda *args: arm_0_test.log_prob(args + (tf.cast(arm_0_test_df['y'],dtype),))
n_chains=3
arm0_res = run_nuts(arm_0_log_prob, [tf.ones((n_chains,1)), tf.ones((n_chains,1)), tf.ones((n_chains,1))])
This does not
dtype=tf.float32
root = tfd.JointDistributionCoroutine.Root
def basic_logistic_multiple(X_df):
X_df_copy = X_df.copy()
n_features = X_df_copy.shape[1] + 1 # have to include intercept term
prior_means = [0 for i in range(n_features)] # list of prior means
print(prior_means)
prior_sds = [10 for i in range(n_features)] # list of prior sds
X_df_copy.insert(0, 'intercept', np.ones(X_df_copy.shape[0])) # Add column of 1s for intercept
X = tf.convert_to_tensor(X_df_copy, dtype=dtype)
def _generator():
beta = yield root(tfd.Sample(
tfd.Normal(prior_means, prior_sds, name='beta')
))
print(beta)
l = tf.tensordot(X, beta, axes=1)
# l = tf.reshape(l, (l.shape[0], ))
print(l)
y = yield tfd.Independent(
tfd.Bernoulli(
logits = l,
name = 'success'
),
reinterpreted_batch_ndims=1
)
return tfd.JointDistributionCoroutine(_generator)
arm_0_test = basic_logistic_multiple(arm_0_test_df)
arm_0_log_prob = lambda *args: arm_0_test.log_prob(args + (tf.cast(arm_0_test_df['y'],dtype),))
n_chains=3
init_beta, _ = arm_0_test.sample(n_chains)
init_beta = tf.zeros_like(init_beta)
arm0_res = run_nuts(arm_0_log_prob, [init_beta,])
I get the following error
ValueError: Dimensions must be equal, but are 3 and 1000000 for '{{node mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/transformed_kernel_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/JointDistributionCoroutine_CONSTRUCTED_AT_top_level/log_prob/add_1}} = AddV2[T=DT_FLOAT](mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/transformed_kernel_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/JointDistributionCoroutine_CONSTRUCTED_AT_top_level/log_prob/add, mcmc_sample_chain/dual_averaging_step_size_adaptation___init__/_bootstrap_results/transformed_kernel_bootstrap_results/NoUTurnSampler/.bootstrap_results/process_args/maybe_call_fn_and_grads/value_and_gradients/value_and_gradient/JointDistributionCoroutine_CONSTRUCTED_AT_top_level/log_prob/Independentsuccess/log_prob/Sum)' with input shapes: [3,3], [1000000].
I can sample from both Jointdistributions fine so I believe it is something clashing in the sample_chain function. Possibly my initial state declaration?
Thanks for any help!
I have some data such as:
I would like to fit a differentiable monotone curve to it. I tried PchipInterpolator class but on a very similar graph it resulted in:
This is not monotone.
How can I fit a monotone curve to data like this?
Here is a sample set of y values for another similar graph:
[0.1109157119023644, 0.20187393816931934, 0.14466318670239758, 0.16535159414166822, 0.05452708697483864, 0.2153046237959556, 0.2200300476272603, 0.21012762463269324, 0.15947100322395022, 0.2819691842129948, 0.15567770052985092, 0.24850595803020692, 0.1329341593280457, 0.15595107081606913, 0.3232021121832229, 0.23707961921686588, 0.2415887076540357, 0.32363506549779797, 0.3584089204036798, 0.29232772580068433, 0.22145994836140775, 0.22797587985241133, 0.2717787840603025, 0.3245255944762287, 0.29301098282789195, 0.32417076823344143, 0.3450906550996232, 0.34272097408024904, 0.3868714875012437, 0.41876692320045755, 0.3544198724867363, 0.33073960954801895, 0.3921033666371904, 0.33349050060172974, 0.3608862044547096, 0.37375822841635425, 0.5396399750708429, 0.4209201143798284, 0.42004773793166883, 0.5217725632679073, 0.5911731474218788, 0.43389609315065386, 0.4287288396176006, 0.43007525393257007, 0.5687062142675405, 0.6030811498722173, 0.5292225577714743, 0.47710974351051355, 0.6182720730381119, 0.6241033581931327, 0.6236788197617511, 0.6643161356364049, 0.5577616524049582, 0.6888440258481371, 0.6867893120660341, 0.6685257606057502, 0.599481675493677, 0.7309075091448749, 0.7644365338580481, 0.6176797601816733, 0.6751467827192018, 0.6452178017908761, 0.6684778262246701, 0.7003380077556168, 0.667035916425416, 0.8434451759113093, 0.8419343615815968, 0.8657695361433773, 0.7392487161484605, 0.8773282098364621, 0.8265679895117846, 0.7246599961191632, 0.7251899061730714, 0.9271640780410231, 0.9180581424305536, 0.8099033021701689, 0.8268585329594615, 0.8519967080830176, 0.8711231413093845, 0.8689802343798663, 0.8299523829217353, 1.0057741699770046, 0.8538130788729608, 0.9662784297225102, 1.023419780920539, 0.913146849759822, 0.9900885996579213, 0.8740638988529978, 0.8900285618419457, 0.9065474574434158, 1.0749522597307315, 1.0319120938258166, 1.0051369663172995, 0.9893558841613622, 1.051384986916457, 1.0327996870915341, 1.0945543972861898, 0.9716604944496021, 1.1490370559566179, 1.1379231481207432, 1.6836433783615088, 1.8162068766097395, 2.072155286917785, 2.0395966998366, 2.191064589600466, 2.1581974932543617, 2.163403843819597, 2.133441151300847, 2.1726053994136922, 2.1157865673629526, 2.2249636455682866, 2.2313062166802147, 2.1731708496472764, 2.315203950110816, 2.1601242661726827, 2.174940281421225, 2.2653635413275945, 2.337227057574145, 2.3645767548381618, 2.3084919291392527, 2.314014515926446, 2.25166717296155, 2.2621157708115778, 2.2644578546265586, 2.313504860292943, 2.398969190357051, 2.309443951779675, 2.278946047410807, 2.4080802287121146, 2.353652872018618, 2.35527529074088, 2.4233001060410784, 2.428767198055608, 2.35677123091093, 2.497135132404064, 2.3978099128437282, 2.3970802609341972, 2.4967434818740024, 2.511209192435555, 2.541001050440798, 2.5760248002036525, 2.5960512284192245, 2.4778408861721037, 2.5757724103530046, 2.631148267999664, 2.538327346218921, 2.4878734713248507, 2.6133797275761066, 2.6282561527857395, 2.6150327104952447, 3.102757164382848, 3.3318503012160905, 3.3907776288198193, 3.6065313558941936, 3.601180295875859, 3.560491539319038, 3.650095006265445, 3.574812155815713, 3.686227315374108, 3.6338261415040867, 3.5661194785086288, 3.5747332336054645, 3.560674343726918, 3.5678550481603635, 3.5342848534390967, 3.4929538312485913, 3.564544653619436, 3.6861775399566126, 3.6390300636595216, 3.6656336332413666, 3.5731185631923945, 3.5965520044069854, 3.537434489989021, 3.5590937423870144, 3.5331656424410083, 3.640652819618705, 3.5971240740252126, 3.641793843012055, 3.6064014089254295, 3.530378938786505, 3.613631139461306, 3.519542268056021, 3.5416251524576, 3.524789618934195, 3.5519951806099512, 3.6435695455293975, 3.6825670484650863, 3.5993379768209217, 3.628367553897596, 3.633290480934276, 3.5772841681579535, 3.602326323397947, 3.518180278272883, 3.531054006706696, 3.5566645495066167, 3.5410992153240985, 3.630762839301216, 3.5924649123201053, 3.646230633817883, 3.568290612034935, 3.638356129262967, 3.566083243271712, 3.6064978645771797, 3.4942864293427633, 3.595438454812999, 3.681726879126678, 3.6501308156903463, 3.5490717955938593, 3.598535359345363, 3.6328331698421654, 3.595159538698094, 3.556715819008055, 3.6292942886764554, 3.6362895697392856, 3.5965220100874093, 3.6103542985016266, 3.5715010140382493, 3.658769915445062, 3.5939686395400416, 3.4974461928859917, 3.5232691556732267, 3.6145687814416614, 3.5682054018341005, 3.648937250575395, 3.4912089018613384, 3.522426560340423, 3.6757968409374637, 3.651348691084845, 3.5395070091675973, 3.5306275536360383, 3.6153498246329883, 3.599762785949876, 3.5351931286962333, 3.6488316987683054, 3.5198301490992963, 3.5696570079786687, 3.561553836008927, 3.5659475947331423, 3.553147100256108, 3.5475591872743664, 3.6097226797553317, 3.6849600324757934, 3.5264731043844413, 3.506658609738451, 3.5535775980874114, 3.5487291053913554, 3.570651383823912, 3.552993371839188, 3.5054297764661846, 3.5723024888238792]
Here's a monotone curve fitter in essentially 5 lines of python, with numpy
and a lowpass filter from scipy.signal:
#!/usr/bin/env python
"""https://stackoverflow.com/questions/56551114/fully-monotone-interpolation-in-python """
# see also
# https://en.wikipedia.org/wiki/Monotone-spline aka I-spline
# https://scikit-learn.org/stable/modules/isotonic.html
# denis 2 March 2020
from __future__ import division, print_function
import numpy as np
from scipy import signal as sig
from matplotlib import pyplot as plt
import seaborn as sns
def butter_filtfilt( x, Wn=0.5, axis=0 ):
""" butter( 2, Wn ), filtfilt
axis 0 each col, -1 each row
"""
b, a = sig.butter( N=2, Wn=Wn )
return sig.filtfilt( b, a, x, axis=axis, method="gust" ) # twice, forward backward
def ints( x ):
return x.round().astype(int)
def minavmax( x ):
return "min av max %.3g %.3g %.3g" % (
x.min(), x.mean(), x.max() )
def pvec( x ):
n = len(x) // 25 * 25
return "%s \n%s \n" % (
minavmax( x ),
ints( x[ - n : ]) .reshape( -1, 25 ))
#...............................................................................
def monofit( y, Wn=0.1 ):
""" monotone-increasing curve fit """
y = np.asarray(y).squeeze()
print( "\n{ monofit: y %d %s Wn %.3g " % (
len(y), minavmax( y ), Wn ))
ygrad = np.gradient( y )
print( "grad y:", pvec( ygrad ))
# lowpass filter --
gradsmooth = butter_filtfilt( ygrad, Wn=Wn )
print( "gradsmooth:", pvec( gradsmooth ))
ge0 = np.fmax( gradsmooth, 0 )
ymono = np.cumsum( ge0 ) # integrate, sensitive to first few
ymono += (y - ymono).mean()
err = y - ymono
print( "y - ymono:", pvec( err ))
errstr = "average |y - monofit|: %.2g" % np.abs( err ).mean()
print( errstr )
print( "} \n" )
return ymono, err, errstr
#...............................................................................
if __name__ == "__main__":
import sys
np.set_printoptions( threshold=20, edgeitems=15, linewidth=120,
formatter = dict( float = lambda x: "%.2g" % x )) # float arrays %.2g
print( 80 * "=" )
thispy = sys.argv[0]
infile = sys.argv[1] if len(sys.argv) > 1 \
else "so-mono.txt"
Wn = 0.1 # ?
params = "%s %s Wn %g " % (thispy, infile, Wn)
print( params )
y = np.loadtxt( infile ) * 100
print( "y:", y )
ymono, err, errstr = monofit( y, Wn=Wn )
if 1:
sns.set_style("whitegrid")
fig, ax = plt.subplots( figsize=[10, 5] )
plt.subplots_adjust( left=.05, right=.99, bottom=.05, top=.90 )
fig.suptitle(
"Easy monotone curve fit: np.gradient | lowpass filter | clip < 0 | integrate \n"
+ errstr, multialignment="left" )
ax.plot( ymono, color="orangered" )
j = np.where( ymono < y )[0]
xax = np.arange( len(y) )
plt.vlines( xax[j], ymono[j], y[j], color="blue", lw=1 )
j = np.where( ymono > y )[0]
plt.vlines( xax[j], y[j], ymono[j], color="blue", lw=1 )
png = thispy.replace( ".py", ".png" )
print( "writing", png )
plt.savefig( png )
plt.show()
You can try to do preprocessing, to make you input data monotone say using isotonic regression as described at
https://stats.stackexchange.com/questions/156327/fit-monotone-polynomial-to-data
After days of trying to apply a tf.while_loop, I still fail to understand how it works (or rather why it does not). The documentations and various questions here on StackOverflow haven't helped so far.
The main idea is to train the different columns of a tensor trueY separately using a while_loop. The problem is that when I trace this code, I see that the while_loop gets called only once.
I'd like to dynamically assign names to variables created in the while_loop so as to be able to access them outside the while_loop after they have been created (thus the "gen_name" function trying to dynamically generate names for the dense layers created in each loop), and make tf.while_loop run n times this way.
Here is a sample of my code with the issue (not the full code and modified to demonstrate this problem)
...................
config['dim_y'] = 10
Xl = tf.placeholder( self.dtype, shape=(batchsize, config['dim_x']) )
Yl = tf.placeholder( self.dtype, shape=(batchsize, config['dim_y']) )
Gl = tf.placeholder( self.dtype, shape=(batchsize, config['dim_g']) )
costl, cost_m, self.cost_b = self.__cost( Xl, Yl, Gl, False )
def __eval_cost( self, A, X, Y, G, reuse ):
AGXY = tf.concat( [A, G, X, Y], -1 )
Z, mu_phi3, ls_phi3 = build_nn( AGXY, ...., reuse )
_cost = -tf.reduce_sum( ls_phi3, -1 )
_cost += .5 * tf.reduce_sum( tf.pow( mu_phi3, 2 ), -1 )
_cost += .5 * tf.reduce_sum( tf.exp( 2*ls_phi3 ), -1 )
return _cost
def __cost( self, trueX, trueY, trueG, reuse ):
........
columns = tf.unstack(trueY, axis=-1)
AGX = tf.concat( [ AX, G ], -1 )
pre_Y = self.build_nn( AGX, ....., reuse )
index_loop = (tf.constant(0), _cost, _cost_bl)
def condition(index, _cost, _cost_supervised_bi_label):
return tf.less(index, self.config['dim_y'])
def bodylabeled(index, _cost, _cost_bl):
def gen_name(var_name):
# split eg 'cost/while/strided_slice_5:0' => '5'
# split eg 'cost/while/strided_slice:0' => 'slice'
iter = var_name.split('/')[-1].split(':')[0].split('_')[-1]
if iter == "slice":
return '0phi2y'
else:
return '{}phi2y'.format(int(iter) % self.config['dim_y'])
y_i = tf.gather(columns, index)
y = tf.expand_dims( tf.one_hot(tf.to_int32(y_i, name='ToInt32'), depth, dtype=self.dtype ), 0 )
Y = tf.tile( y, [self.config['L'],1,1] )
c = tf.constant(0, name='test')
log_pred_Y = tf.layers.dense( pre_Y, 2, name=gen_name(iter[index].name), reuse=reuse )
log_pred_Y = log_pred_Y - tf.reduce_logsumexp( log_pred_Y, -1, keep_dims=True )
_cost += self.__eval_cost_given_axgy( A, X, Y, G, reuse=tf.AUTO_REUSE )
_cost_bl += -tf.reduce_sum( tf.multiply( Y, log_pred_Y ), -1 )
return tf.add(index, 1), _cost, _cost_supervised_bi_label
_cost, _bl = tf.while_loop(condition, bodylabeled, index_loop, parallel_iterations=1, shape_invariants=(index_loop[0].get_shape(), tf.TensorShape([None, 100]), tf.TensorShape([None, 100])))[1:]
op = costl + cost_m + cost_b
with tf.Session(config=config) as sess:
sess.run( tf.global_variables_initializer() )
sess.run(tf.local_variables_initializer())
for batchl in batches:
sess.run( op,
feed_dict={Xl:Xl[batchl,:],
Yl:Yl[batchl,:].toarray(),
Gl:Gl[batchl,:].toarray(),
is_training:True } )
for n in tf.get_default_graph().as_graph_def().node:
print(n.name)
I am writing a numba function to calculate a portfolio's volatility:
Some functions that I am using to do this are here:
import numba as nb
import numpy as np
def portfolio_s2( cv, weights ):
""" Calculate the variance of a portfolio """
return weights.dot( cv ).dot( weights )
#nb.jit( nopython=True )
def portfolio_s2c( cv, weights ):
s0 = 0.0
for i in range( weights.shape[0]-1 ):
wi = weights[i]
s0 += cv[i,i]*wi*wi
s1 = 0.0
for j in range( i+1, weights.shape[0] ):
s1 += wi*weights[j]*cv[i,j]
s0 += 2.0*s1
i = weights.shape[0]-1
wi = weights[ i ]
s0 += cv[i,i]*wi**2
return s0
#nb.jit( nopython=True )
def portfolio_s2b( cv, weights ):
s0 = 0.0
for i in range( weights.shape[0] ):
s0 += weights[i]*weights[i]*cv[i,i]
s1 = 0.0
for i in range( weights.shape[0]-1 ):
s2 = 0.0
for j in range( i+1, weights.shape[0] ):
s2 += weights[j]*cv[i,j]
s1+= weights[i]*s2
return s0+2.0*s1
I am testing the performance of the functions using this code:
N = 1000
num_tests = 10000
times_2b = []
times_2c = []
times_np = []
matrix_sizes = [ 2,4,8, 10, 20, 40, 80, 160 ]#, 320, 640, 1280, 2560 ]
for m in matrix_sizes:
X = np.random.randn( N, m )
cv = np.cov( X, rowvar=0 )
w = np.ones( cv.shape[0] ) / cv.shape[0]
s2 = helpers.portfolio_s2( cv, w )
s2b = helpers.portfolio_s2b( cv, w )
s2c = helpers.portfolio_s2c( cv, w )
np.testing.assert_almost_equal( s2, s2b )
np.testing.assert_almost_equal( s2, s2c )
with Timer( 'nb2b' ) as t2b:
for _ in range(num_tests):
helpers.portfolio_s2b( cv, w )
with Timer( 'nb2c' ) as t2c:
for _ in range(num_tests):
helpers.portfolio_s2c( cv, w )
with Timer( 'np' ) as tnp:
for _ in range(num_tests):
helpers.portfolio_s2( cv, w )
times_2b.append( t2b.timetaken )
times_2c.append( t2c.timetaken )
times_np.append( tnp.timetaken )
plt.figure()
plt.plot( matrix_sizes, times_2b, label='2b' )
plt.plot( matrix_sizes, times_2c, label='2c' )
plt.plot( matrix_sizes, times_np, label='np' )
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
This is the Timer class:
import time
class Timer( object ):
def __init__(self, name=''):
self._name = name
def __enter__(self):
self.start = time.time()
return self
def __exit__(self,a,b,c):
self.end = time.time()
self.timetaken = self.end-self.start
print( '{0} Took {1} seconds'.format( self._name, self.timetaken ))
The results are plotted here:
The results show that:
The numba versions of the function outperform the numpy version for matrix sizes under 80
The numba versions seem to scale worse than the numpy function
Why is this? Is there some sort of overhead associated with calling numpy, as opposed to calling numba?
Why does the numpy function scale better? Is it doing something fancy with BLAS in the background, or is it using a superior algorithm for the calculation?
Can I make the numba function scale as well as the numpy function?