cost function outputs 'nan' in tensorflow - python

While studying the tensorflow, I faced a problem.
The cost function output 'nan'.
And, if you find any other wrong in source code let me know the links for it.
I am trying to send the cost function value to my trained model, but its not working.
tf.reset_default_graph()
tf.set_random_seed(777)
X = tf.placeholder(tf.float32, [None, 20, 20, 3])
Y = tf.placeholder(tf.float32, [None, 1])
with tf.variable_scope('conv1') as scope:
W1 = tf.Variable(tf.random_normal([4, 4, 3, 32], stddev=0.01), name='weight1')
L1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
L1 = tf.reshape(L1, [-1, 10 * 10 * 32])
W1_hist = tf.summary.histogram('conv_weight1', W1)
L1_hist = tf.summary.histogram('conv_layer1', L1)
with tf.name_scope('fully_connected_layer1') as scope:
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1], initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
hypothesis = tf.matmul(L1, W2) + b
W2_hist = tf.summary.histogram('fully_connected_weight1', W2)
b_hist = tf.summary.histogram('fully_connected_bias', b)
hypothesis_hist = tf.summary.histogram('hypothesis', hypothesis)
with tf.name_scope('cost') as scope:
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
cost_summary = tf.summary.scalar('cost', cost)
with tf.name_scope('train_optimizer') as scope:
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
accuracy_summary = tf.summary.scalar('accuracy', accuracy)
train_data_batch, train_labels_batch = tf.train.batch([train_data, train_labels], enqueue_many=True , batch_size=100, allow_smaller_final_batch=True)
with tf.Session() as sess:
# tensorboard --logdir=./logs/planesnet2_log
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter('./logs/planesnet2_log')
writer.add_graph(sess.graph)
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
total_cost = 0
for step in range(20):
x_batch, y_batch = sess.run([train_data_batch, train_labels_batch])
feed_dict = {X: x_batch, Y: y_batch}
_, cost_val = sess.run([optimizer, cost], feed_dict = feed_dict)
total_cost += cost_val
print('total_cost: ', total_cost, 'cost_val: ', cost_val)
coord.request_stop()
coord.join(threads)

You use a cross entropy loss without a sigmoid activation function to hypothesis, thus your values are not bounded in ]0,1]. The log function is not defined for negative values and it most likely get somes. Add a sigmoid and epsilon factor to avoid negative or 0 values and you should be fine.

As I know,
Cross entropy cost function assumes that the hypothesis which you want to predict is stochastic value. Because cross entropy uses log function and (1-Y_) formula. Therefore, cross entropy loss should be used only for stochastic cases.
So you have to use the softmax function to make the results of the hypothesis probability.
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
# hypothesis = tf.matmul(L1, W2) + b
hypothesis = tf.nn.softmax(tf.add(tf.matmul(L1, W2), b))
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
Or you can use this code
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
hypothesis = tf.matmul(L1, W2) + b
cost = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=hypothesis)

Related

Pricing american options using deep learning, put instead of max-call

So I'm trying to learn to optimally stop options in a Black-Scholes setting along the lines of the article: "Solving high-dimensional optimal stopping problems using deep learning" by Sebastian Becker, Patrick Cheridito, Arnulf Jentzen, and Timo Welti.
The framework used to price options is the following:
import tensorflow as tf
from tensorflow.python.training.moving_averages import assign_moving_average
def neural_net(x, neurons, is_training, dtype=tf.float32, decay=0.9):
def batch_normalization(y):
shape = y.get_shape().as_list()
y = tf.reshape(y, [-1, shape[1] * shape[2]])
#variables for batch normalization
beta = tf.compat.v1.get_variable(
name='beta', shape=[shape[1] * shape[2]],
dtype=dtype, initializer=tf.zeros_initializer())
gamma = tf.compat.v1.get_variable(
name='gamma', shape=[shape[1] * shape[2]],
dtype=dtype, initializer=tf.ones_initializer())
mv_mean = tf.compat.v1.get_variable(
'mv_mean', [shape[1]*shape[2]],
dtype = dtype, initializer=tf.zeros_initializer(),
trainable = False)
mv_var = tf.compat.v1.get_variable(
'mv_var', [shape[1]*shape[2]],
dtype = dtype, initializer =tf.ones_initializer(),
trainable = False)
mean,variance = tf.nn.moments(y, [0], name = 'moments')
tf.compat.v1.add_to_collection(
tf.compat.v1.GraphKeys.UPDATE_OPS,
assign_moving_average(mv_mean, mean, decay,
zero_debias=True))
tf.compat.v1.add_to_collection(
tf.compat.v1.GraphKeys.UPDATE_OPS,
assign_moving_average(mv_var, variance, decay,
zero_debias=False))
mean, variance = tf.cond(is_training, lambda: (mean, variance),
lambda: (mv_mean, mv_var))
y = tf.nn.batch_normalization(y, mean, variance, beta, gamma, 1e-6)
return tf.reshape(y, [-1, shape[1], shape[2]])
def fc_layer(y, out_size, activation, is_single):
shape = y.get_shape().as_list()
w = tf.compat.v1.get_variable(
name='weights',
shape=[shape[2], shape[1], out_size],
dtype=dtype,
initializer=tf.initializers.glorot_uniform())
y = tf.transpose(tf.matmul(tf.transpose(y, [2, 0, 1]), w),
[1, 2, 0])
if is_single:
b = tf.compat.v1.get_variable(
name='bias',
shape=[out_size, shape[2]],
dtype = dtype,
initializer=tf.zeros_initializer())
return activation(y + b)
return activation(batch_normalization(y))
x = batch_normalization(x)
for i in range(len(neurons)):
with tf.compat.v1.variable_scope('layer_' + str(i)):
x = fc_layer(x, neurons[i],
tf.nn.relu if i < len(neurons) - 1
else tf.nn.sigmoid, False)
return x
#then Deep optimal stopping
def deep_optimal_stopping(x, t, n, g, neurons, batch_size, train_steps,
mc_runs, lr_boundaries, lr_values, beta1=0.9,
beta2=0.999, epsilon=1e-8, decay=0.9):
is_training = tf.compat.v1.placeholder(tf.bool, []) # a variable used to distinguish between training and Monte Carlo simulation, used for batch noralization
p = g(t, x) # we evaluate the payoff for the whole batch at every point in time
nets = neural_net(tf.concat([x[:, :, :-1], p[:, :, :-1]], axis=1),
neurons, is_training, decay=decay)
u_list = [nets[:, :, 0]]
u_sum = u_list[-1]
for k in range(1, n - 1): #range(start, stop)
u_list.append(nets[:, :, k] * (1. - u_sum)) # we build a neural network to approximate the stopping decision at time n*T/N
u_sum += u_list[-1]
#last iteration?
u_list.append(1. - u_sum)
u_stack = tf.concat(u_list, axis=1)
p = tf.squeeze(p, axis=1) #removes dimension of size 1
loss = tf.reduce_mean(tf.reduce_sum(-u_stack * p, axis=1)) #loss function
idx = tf.argmax(tf.cast(tf.cumsum(u_stack, axis=1) + u_stack >= 1,
dtype=tf.uint8), #idx for index?, argmax takes index for largest value
axis=1, output_type=tf.int32)
stopped_payoffs = tf.reduce_mean(
tf.gather_nd(p, tf.stack([tf.range(0, batch_size, dtype=tf.int32),
idx], axis=1))) # this is the approximation of the price for one batch, we will calculate the mean over MC-runs of those numbers
global_step = tf.Variable(0) # a variable used to apply the learning rate schedule, without it the optimizer would not know at which training step we are
learning_rate = tf.compat.v1.train.piecewise_constant(global_step,
lr_boundaries,
lr_values) # this gives us a piecewise constant learning rate, according to the schedule
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate,
beta1=beta1,
beta2=beta2,# define the optimizer, we use Adam with our learning rate schedule and a small tweak of one of its parameters
epsilon=epsilon)
update_ops = tf.compat.v1.get_collection(
tf.compat.v1.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(loss, global_step=global_step)
with tf.compat.v1.Session() as sess:
sess.run(tf.compat.v1.global_variables_initializer())
for _ in range(train_steps):
sess.run(train_op, feed_dict={is_training: True})
px_mean = 0. # value that will hold the price
for _ in range(mc_runs): # loop over the number of MC runs
px_mean += sess.run(stopped_payoffs,
feed_dict={is_training: False})# we stop training, this is used for the batch normalization, from now on we will use the sampled moving averages
return px_mean / mc_runs
Now we define the various variables and simulate paths of a stock as X. Then we run use deep_optimal_stopping function to price the option, defined in the following code
import tensorflow as tf
import numpy as np
import time
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
T, N, K = 3., 9, 100.
r, delta, beta = 0.05, 0.1, 0.2
batch_size = 800#8192
lr_values = [0.05, 0.005, 0.0005]
mc_runs = 50#500
def g(s, x):
return tf.exp(-r * s) \
* tf.maximum(tf.reduce_max(x, axis=1, keepdims=True) - K, 0.)
_file = open('example_4_4_1_1.csv', 'w')
_file.write('dim, run, mean, time\n')
for d in [2, 3, 5, 10, 20, 30, 50, 100, 200, 500]:
for s_0 in [40.]:#[90., 100., 110.]:
for run in range(5):
tf.compat.v1.reset_default_graph()
t0 = time.time()
neurons = [d + 50, d + 50, 1]
train_steps = 1500 + d
lr_boundaries = [int(500 + d / 5), int(1500 + 3 * d / 5)]
W = tf.cumsum(tf.compat.v1.random_normal(
shape=[batch_size, d, N],
stddev=np.sqrt(T / N)), axis=2)
t = tf.constant(np.linspace(start=T / N, stop=T, num=N,
endpoint=True, dtype=np.float32))
#X = tf.exp((r - delta - beta ** 2 / 2.) * t + beta * W) * s_0
px_mean = deep_optimal_stopping(
W, t, N, g, neurons, batch_size,
train_steps, mc_runs,
lr_boundaries, lr_values, epsilon=0.1)
t1 = time.time()
print("")
_file.write('%i, %i, %f, %f\n' % (d, run, px_mean, t1 - t0))
_file.close()
So here the option is a bermudan max-call defined by the payoff function g(s,x). My understanding would be, if I wanted the price of an American put, I instead changed the payoff function g to be:
def g(s, x):
return tf.exp(-r * s) * tf.maximum(K-x, 0.)
and otherwise changing nothing. But instead of getting a price of 5.31 as reported in their article, I get 4.02.
Can someone explain where I'm going wrong with my understanding of the problem?

How to fix : ValueError: Cannot feed value of shape (96, 28, 28, 1) for Tensor 'inputX_25:0', which has shape '(128, 28, 28, 1)'`

Conducting an adversarial attacks on deep decision trees traied on MNIST dataset.
N_LEAF = 2 ** (DEPTH + 1) # Number of leaf node
N_LABEL = 10 # Number of classes
N_TREE = 5 # Number of trees (ensemble)
N_BATCH = 128 # Number of data points per mini-batch
Load dataset
(trX, trY), (teX, teY), min_pixel_value, max_pixel_value = load_mnist()
trX = trX.reshape(-1, 28, 28, 1)
teX = teX.reshape(-1, 28, 28, 1)
print (trX.shape[1])
Input X, output Y, placeholders for x and y
X = tf.placeholder("float", shape=[N_BATCH, 28, 28, 1], name='inputX')
Y = tf.placeholder("float", shape=[N_BATCH, N_LABEL], name='inputY')
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def init_prob_weights(shape, minval=-5, maxval=5):
return tf.Variable(tf.random_uniform(shape, minval, maxval))
def model(X, w, w2, w3, w4_e, w_d_e, w_l_e, p_keep_conv, p_keep_hidden):
assert (len(w4_e) == len(w_d_e))
assert (len(w4_e) == len(w_l_e))
l1a = tf.nn.relu(tf.nn.conv2d(X, w, [1, 1, 1, 1], 'SAME'))
l1 = tf.nn.max_pool(l1a, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
l1 = tf.nn.dropout(l1, p_keep_conv)
l2a = tf.nn.relu(tf.nn.conv2d(l1, w2, [1, 1, 1, 1], 'SAME'))
l2 = tf.nn.max_pool(l2a, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
l2 = tf.nn.dropout(l2, p_keep_conv)
l3a = tf.nn.relu(tf.nn.conv2d(l2, w3, [1, 1, 1, 1], 'SAME'))
l3 = tf.nn.max_pool(l3a, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
l3 = tf.reshape(l3, [-1, w4_e[0].get_shape().as_list()[0]])
l3 = tf.nn.dropout(l3, p_keep_conv)
decision_p_e = []
leaf_p_e = []
for w4, w_d, w_l in zip(w4_e, w_d_e, w_l_e):
l4 = tf.nn.relu(tf.matmul(l3, w4))
l4 = tf.nn.dropout(l4, p_keep_hidden)
decision_p = tf.nn.sigmoid(tf.matmul(l4, w_d)) ##check here
leaf_p = tf.nn.softmax(w_l)
decision_p_e.append(decision_p)
leaf_p_e.append(leaf_p)
return decision_p_e, leaf_p_e
class TFClassifierRF(TFClassifier):
def predict(self, x, batch_size , **kwargs):
#Apply preprocessing
x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)
rf_feed_dict = kwargs['rf_feed_dict']
# Run prediction with batch processing
results = np.zeros((x_preprocessed.shape[0], self.nb_classes()), dtype=np.float32)
num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
for m in range(num_batch):
# Batch indexes
begin, end = m * batch_size, min((m + 1) * batch_size, x_preprocessed.shape[0])
# Create feed_dict
feed_dict = {self._input_ph: x_preprocessed[begin:end]}
feed_dict.update(self._feed_dict)
feed_dict.update(rf_feed_dict)
# Run prediction
results[begin:end] = self._sess.run(self._output, feed_dict=feed_dict)
return results
def fit(self, x, y, batch_size=128 , nb_epochs=10 , **kwargs):
#Check if train and output_ph available
if self._train is None or self._labels_ph is None:
raise ValueError("Need the training objective and the output placeholder to train the model.")
# Apply preprocessing
x_preprocessed, y_preprocessed = self._apply_preprocessing(x, y, fit=True)
num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
ind = np.arange(len(x_preprocessed))
rf_feed_dict = kwargs['rf_feed_dict']
# Start training
for _ in range(nb_epochs):
# Shuffle the examples
random.shuffle(ind)
# Train for one epoch
for m in range(num_batch):
i_batch = x_preprocessed[ind[m * batch_size:(m + 1) * batch_size]]
o_batch = y_preprocessed[ind[m * batch_size:(m + 1) * batch_size]]
# Create feed_dict
feed_dict = {self._input_ph: i_batch, self._labels_ph: o_batch}
feed_dict.update(self._feed_dict)
feed_dict.update(rf_feed_dict)
# Run train step
self._sess.run(self._train, feed_dict=feed_dict)
def fit_generator(self, generator, nb_epochs, **kwargs):
from art.data_generators import TFDataGenerator
rf_feed_dict = kwargs['rf_feed_dict']
# Train directly in TensorFlow
if isinstance(generator, TFDataGenerator) and not (
hasattr(self, 'label_smooth') or hasattr(self, 'feature_squeeze')):
for _ in range(nb_epochs):
for _ in range(int(generator.size / generator.batch_size)):
i_batch, o_batch = generator.get_batch()
# Create feed_dict
feed_dict = {self._input_ph: i_batch, self._labels_ph: o_batch}
feed_dict.update(self._feed_dict)
feed_dict.update(rf_feed_dict)
# Run train step
self._sess.run(self._train, feed_dict=feed_dict)
super(TensorFlowClassifier, self).fit_generator(generator, nb_epochs=nb_epochs, **kwargs)
initialize the weight of the model
w = init_weights([3, 3, 1, 32])
w2 = init_weights([3, 3, 32, 64])
w3 = init_weights([3, 3, 64, 128])
w4_ensemble = []
w_d_ensemble = []
w_l_ensemble = []
for i in range(N_TREE):
w4_ensemble.append(init_weights([128 * 4 * 4, 625]))
w_d_ensemble.append(init_prob_weights([625, N_LEAF], -1, 1))
w_l_ensemble.append(init_prob_weights([N_LEAF, N_LABEL], -2, 2))
p_keep_conv = tf.placeholder("float", name="p_keep_conv")
p_keep_hidden = tf.placeholder("float", name = "p_keep_hidden")
Define the deeep decision tree model
# With the probability decision_p, route a sample to the right branch
decision_p_e, leaf_p_e = model(X, w, w2, w3, w4_ensemble, w_d_ensemble,
w_l_ensemble, p_keep_conv, p_keep_hidden)
flat_decision_p_e = []
# iterate over each tree
for decision_p in decision_p_e:
# Compute the complement of d, which is 1 - d
# where d is the sigmoid of fully connected output
decision_p_comp = tf.subtract(tf.ones_like(decision_p), decision_p)
# Concatenate both d, 1-d
decision_p_pack = tf.stack([decision_p, decision_p_comp])
# Flatten/vectorize the decision probabilities for efficient indexing
flat_decision_p = tf.reshape(decision_p_pack, [-1])
flat_decision_p_e.append(flat_decision_p)
# 0 index of each data instance in a mini-batch
batch_0_indices = \
tf.tile(tf.expand_dims(tf.range(0, N_BATCH * N_LEAF, N_LEAF), 1),
[1, N_LEAF])
in_repeat = N_LEAF / 2
out_repeat = N_BATCH
# N_LEAF = float(N_LEAF)
# N_BATCH = float(N_BATCH)
# Let N_BATCH * N_LEAF be N_D. flat_decision_p[N_D] will return 1-d of the
# first root node in the first tree.
batch_complement_indices = \
np.array([[0] * int(in_repeat), [N_BATCH * N_LEAF] * int(in_repeat)]
* out_repeat).reshape(N_BATCH, N_LEAF)
First define the routing probabilities d for root nodes
mu_e = []
# iterate over each tree
for i, flat_decision_p in enumerate(flat_decision_p_e):
mu = tf.gather(flat_decision_p,
tf.add(batch_0_indices, batch_complement_indices))
mu_e.append(mu)
from the second layer to the last layer, we make the decision nodes
for d in range(1, DEPTH + 1):
indices = tf.range(2 ** d, 2 ** (d + 1)) - 1
tile_indices = tf.reshape(tf.tile(tf.expand_dims(indices, 1),
[1, 2 ** (DEPTH - d + 1)]), [1, -1])
batch_indices = tf.add(batch_0_indices, tf.tile(tile_indices, [N_BATCH, 1]))
in_repeat = in_repeat / 2
out_repeat = out_repeat * 2
# Again define the indices that picks d and 1-d for the node
batch_complement_indices = \
np.array([[0] * int(in_repeat), [N_BATCH * N_LEAF] * int(in_repeat)]
* out_repeat).reshape(N_BATCH, N_LEAF)
mu_e_update = []
for mu, flat_decision_p in zip(mu_e, flat_decision_p_e):
mu = tf.multiply(mu, tf.gather(flat_decision_p,
tf.add(batch_indices, batch_complement_indices)))
mu_e_update.append(mu)
mu_e = mu_e_update
Define p(y|x)
py_x_e = []
for mu, leaf_p in zip(mu_e, leaf_p_e):
# average all the leaf p
py_x_tree = tf.reduce_mean(
tf.multiply(tf.tile(tf.expand_dims(mu, 2), [1, 1, N_LABEL]),
tf.tile(tf.expand_dims(leaf_p, 0), [N_BATCH, 1, 1])), 1)
py_x_e.append(py_x_tree)
py_x_e = tf.stack(py_x_e)
# logit
py_x = tf.reduce_mean(py_x_e, 0)
Define cost and optimization method
# cross entropy loss
loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=py_x, onehot_labels=Y))
# cost = tf.reduce_mean(tf.nn.cross_entropy_with_logits(py_x, Y))
optimizer = tf.train.AdamOptimizer(0.001, 0.9)
train = optimizer.minimize(loss)
# predict = tf.argmax(py_x, 1)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
classifier = TFClassifierRF(clip_values=(min_pixel_value, max_pixel_value,), input_ph=X, output=py_x,
labels_ph=Y, train=train, loss=loss, learning=None, sess=sess)
rf_feed_dict = {p_keep_conv: 0.8, p_keep_hidden: 0.5}
classifier.fit(trX, trY, N_BATCH, nb_epochs=10, rf_feed_dict=rf_feed_dict)
ValueError: Cannot feed value of shape (96, 28, 28, 1) for Tensor 'inputX_25:0', which has shape '(128, 28, 28, 1)'
rf_feed_dict = {p_keep_conv: 1.0, p_keep_hidden: 1.0}
predictions = classifier.predict(teX, batch_size=128, rf_feed_dict=rf_feed_dict)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(teY, axis=1)) / len(teY)
print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

Specific linear classifier in TensorFlow: input element as vector

How can I implement such linear classifier in TensorFlow:
x1*w1 + x2*w2 + x3*w3 = y_pred,
where x1, x2, x3 - vectors and w1, w2 and w3 - scalars?
I have nice tutorial for case where x1, x2, x3 - scalars (link),
but for case where x1, x2, x3 are vectors I have no realization ideas.
UPDATE
That is I am trying to implement the following model:
x1*w1+ x2*w1+x3*w1+x4*w2+x5*w2+x6*w2+x7*w3+x8*w3+x9*w3=y_pred,
where x1..x9 and w1..w9 are scalars.
The linear multiclass classifier to be implemented:
pred = w1 * (x1 + x2 + x3) + w2 * (x4 + x5 + x6) + w3 * (x7 + x8 + x9)
in which all variables are scalars.
In this model, since pred is a scalar, you cannot use cross-entropy loss for training the classifier (pred is not a distribution). You have to treat it as a regression problem.
Example dataset
import numpy as np
x1 = np.ones((100, 3)) # for w1
x2 = np.ones((100, 3)) * 2 # for w2
x3 = np.ones((100, 3)) * 3 # for w3
# set(y) is {0, 1, 2, 3}, corresponds to the four class labels
y = np.random.randint(0, 4, 100).reshape(-1, 1)
Example tensorflow code:
import tensorflow as tf
tf.reset_default_graph()
f1 = tf.placeholder('float32', shape=[None, 3], name='f1')
f2 = tf.placeholder('float32', shape=[None, 3], name='f2')
f3 = tf.placeholder('float32', shape=[None, 3], name='f3')
target = tf.placeholder('float32', shape=[None, 1], name='target')
# the three scalars
w1 = tf.get_variable('w1', shape=[1], initializer=tf.random_normal_initializer())
w2 = tf.get_variable('w2', shape=[1], initializer=tf.random_normal_initializer())
w3 = tf.get_variable('w3', shape=[1], initializer=tf.random_normal_initializer())
pred_1 = tf.reduce_sum(tf.multiply(f1, w1), axis=1)
pred_2 = tf.reduce_sum(tf.multiply(f2, w2), axis=1)
pred_3 = tf.reduce_sum(tf.multiply(f3, w3), axis=1)
# till now the linear classifier has been constructed
# pred = w1(x1 + x2 + x3) + w2(x4 + x5 + x6) + w3(x7 + x8 + x9)
pred = tf.add_n([pred_1, pred_2, pred_3])
# treat it as a regression problem
loss = tf.reduce_mean(tf.square(pred - target))
optimizer = tf.train.GradientDescentOptimizer(1e-5)
updates = optimizer.minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for t in range(50):
loss_val, _ = sess.run([loss, updates],
feed_dict={f1: x1, f2: x2, f3: x3, target: y})
print(t, loss_val)
Below is a simple example uses cross-entropy loss for training a multiclass classifier. As you can notice, this model is a neural network model
import numpy as np
import tensorflow as tf
x1 = np.ones((100, 3)) # for w1
x2 = np.ones((100, 3)) * 2 # for w2
x3 = np.ones((100, 3)) * 3 # for w3
y = np.random.randint(0, 4, 400).reshape(100, 4)
tf.reset_default_graph()
f1 = tf.placeholder('float32', shape=[None, 3], name='f1')
f2 = tf.placeholder('float32', shape=[None, 3], name='f2')
f3 = tf.placeholder('float32', shape=[None, 3], name='f3')
target = tf.placeholder('float32', shape=[None, 4], name='target')
# the three scalars
w1 = tf.get_variable('w1', shape=[1], initializer=tf.random_normal_initializer())
w2 = tf.get_variable('w2', shape=[1], initializer=tf.random_normal_initializer())
w3 = tf.get_variable('w3', shape=[1], initializer=tf.random_normal_initializer())
w = tf.get_variable('w', shape=[3, 4], initializer=tf.random_normal_initializer())
pred_1 = tf.reduce_sum(tf.multiply(f1, w1), axis=1)
pred_2 = tf.reduce_sum(tf.multiply(f2, w2), axis=1)
pred_3 = tf.reduce_sum(tf.multiply(f3, w3), axis=1)
pred = tf.stack([pred_1, pred_2, pred_3], axis=1)
pred = tf.matmul(pred, w)
loss = tf.losses.softmax_cross_entropy(onehot_labels=target, logits=pred)
optimizer = tf.train.GradientDescentOptimizer(1e-5)
updates = optimizer.minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for t in range(50):
loss_val, _ = sess.run([loss, updates],
feed_dict={f1: x1, f2: x2, f3: x3, target: y})
print(t, loss_val)
I used created an array that looks like [w1, w1, w1, w2, w2, w2 ...] and multiplied it (element-wise) by x before summing all terms up. I could not get model.fit to work so I copied the train_step code from https://www.tensorflow.org/tutorials/quickstart/advanced. It seems to work just fine. I left my test code at the bottom for you to inspect.
This makes use of tensorlfow 2.0 and the intergration with keras models
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
print(tf.executing_eagerly())
class ProductAdd(Model):
def __init__(self):
super(ProductAdd, self).__init__()
self.vars = list(np.empty([3])) # Creates an empty list (same as [ , , ])
for i in range(3):
self.vars[i] = tf.Variable( # Creates 3 variables to act as weights
np.random.standard_normal(), # Assigns variables random value to start
name='var'+str(i)) # Names them var0 var1...
def call(self, x):
extended_vars = [self.vars[int(np.floor(i/3))] # "Extends" var array to look like:
for i in range(9)] # [w1, w1, w1, w2, w2, w2, w3, w3, w3]
return np.sum(np.multiply(x, extended_vars)) # Perfoms element-wise multiplication on x and sums
loss_object = MeanSquaredError() # Create loss and optimizer
optimizer = Adam()
#tf.function # This function perfoms trains the model
def train_step(images, labels): # I got it from https://www.tensorflow.org/tutorials/quickstart/advanced
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
model = ProductAdd()
for _ in range(100):
train_step([1.0, 2.0 ,3.0 ,4.0, 5.0, 6.0, 7.0, 8.0, 9.0], [0.0])
print(model([1.0, 2.0 ,3.0 ,4.0, 5.0, 6.0, 7.0, 8.0, 9.0]).numpy())
This question is ill-posed. You say you want x_1, x_2, x_3 to be vectors, however it's not clear what you would do with w_1, w_2, w_3. There are two possibilities.
If you want to keep them as scalars, as your question seems to imply, then the model is not really a vector model, you're just doing the same scalar operation on all the entries of the x vectors, but at once. This is equivalent to a scalar model.
Otherwise, you can define w_1, w_2, w_3 as matrices, or row vectors, if the label is scalar. In this case, there is no reason to write the equation as you wrote it, because you could stack the xs in a single vector and the ws in a single vector and write wx = y. In any case, this is a multivariate linear regression, of which you can find many examples, and tutorials on how to solve it in Tensorflow and Torch.
Update, given OP's clarification
In your comment, you now say you're interested in solving the following equation:
w1*(x1 + x2 + x3) + w2*(x4 + x5 + x6) + w3*(x7 + x8 + x9) == y
where all variables are scalars. Note that the x variables are known, so we can define (a simple arithmetic operation):
z1 = x1 + x2 + x3; z2 = x4 + x5 + x6; z3 = x7 + x8 + x9
And the equation becomes
w1*z1 + w2*z2 + w3*z3 = y.
So this is more like a linear algebra question rather than a tensorflow/torch question, because this equation can be solved analytically, and does not require numerical fitting. However, it is still ill-defined, because it has 3 unknowns (w1, w2, w3) for one linear equation. So it will not have a unique solution, but a two-dimensional linear space of solutions (it identifies a plane in the 3-dimensional w-space). To get some solutions, you can arbitrarily decide to set, for example, w1 = w2 = 0, from which you automatically get w3 = z3/y. Then do the same for the other two, and you'll get three different and linearly independent solutions.
Hope this helps. In summary, you don't need code at all.
Second update (from comment)
Why does it need to solved using optimization? If the problem is as you presented it, it clearly does not. Unless you mean you have many values for the Xs and Ys. In that case, you're doing multivariate linear regression. MLR can be solved using ordinary least squares, see for example https://towardsdatascience.com/simple-and-multiple-linear-regression-in-python-c928425168f9

Tensorflow CNN test data splitting and array sizing problems

I've tried to figure things out myself and not fallback to actually creating an account here but as a self-taught beginner I've reached a wall with this code.
I'm having two major issues besides optimizing the net architecture when everything is working:
Everytime I've tried to create a new dataset for a test batch I've ran into 'xTensor is not a Tensor' error and could run a session through it, unlike with the iterator which works just fine. I'm loading custom data with dir names as labels with no manually created train and test directories. I'm probably missing a proper method for tf.
I can't work around the current first error I get which is:
'ValueError: Cannot feed value of shape (100,) for Tensor 'Placeholder_1:0', which has shape '(?, 1)' while feed_dict {y=batch_y}. I've tried some of solutions posted on SO but couldn't get it to work.
I'm pasting the whole thing, ########### are the problem triggering zones at the very bottom in the session.
import tensorflow as tf
import numpy as np
import os
# load custom imageset directory
data_path = r"..\datasets\images\flowers"
# setup hypervariables for labels and images format
n_classes = 5
img_width = 64
img_length = 64
channels = 3
# setup hypervariables for network
learning_rate = 0.0001
epochs = 2
batch_size = 100
drop_rate = 0.6
imagepaths = list()
labels = list()
label = 0
classes = sorted(os.walk(data_path).__next__()[1])
# List each sub-directory (the classes)
for c in classes:
c_dir = os.path.join(data_path, c)
walk = os.walk(c_dir).__next__()
# Add each image to the training set
for sample in walk[2]:
imagepaths.append(os.path.join(c_dir, sample))
labels.append(label)
label += 1
total_input = len(labels)
# Convert to Tensor
imagepaths = tf.convert_to_tensor(imagepaths, dtype=tf.string)
labels = tf.convert_to_tensor(labels, dtype=tf.int32)
# Build a TF Queue, shuffle data
dataset = tf.data.Dataset.from_tensor_slices((imagepaths, labels))
# read, decode, resize and normalize images on RGB range
def parse(imagepath, label):
image = tf.read_file(imagepath)
image = tf.image.decode_jpeg(image, channels=channels)
image = tf.image.resize_images(image, [img_length, img_width])
image = image * 1.0/255
return image, label
dataset = dataset.map(parse)
dataset = dataset.shuffle(buffer_size=batch_size*10)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
next_batch = iterator.get_next()
# hypervariables for layers' output size
K = 16
L = 32
M = 200
x = tf.placeholder(tf.float32, [None, 4326])
x_shaped = tf.reshape(x, [-1, img_length, img_width, 3])
y = tf.placeholder(tf.float32, [None, 1])
# weight, bias with stride size and activation method after convolution for layer 1
W1 = tf.Variable(tf.truncated_normal([5, 5, 3, K], stddev=0.03))
b1 = tf.Variable(tf.truncated_normal([K], stddev=0.01))
stride = 1
y1 = tf.nn.relu(tf.nn.conv2d(x_shaped, W1, strides=[1, stride, stride, 1], padding='SAME') + b1)
# weight, bias with stride size and activation method after convolution for layer 2
W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.03))
b2 = tf.Variable(tf.truncated_normal([L], stddev=0.01))
stride = 2 # output is 14x14
y2 = tf.nn.relu(tf.nn.conv2d(y1, W2, strides=[1, stride, stride, 1], padding='SAME') + b2)
yflat = tf.reshape(y2, [-1, 7 * 7 * L])
W3 = tf.Variable(tf.truncated_normal([7 * 7 * L, M], stddev=0.1))
b3 = tf.Variable(tf.truncated_normal([M], stddev=0.01))
y3 = tf.nn.relu(tf.matmul(yflat, W3) + b3)
W4 = tf.Variable(tf.truncated_normal([M, 10], stddev=0.1))
b4 = tf.Variable(tf.truncated_normal([10], stddev=0.01))
ylogits = tf.matmul(y3, W4) + b4
y_ = tf.nn.softmax(ylogits)
# add cross entropy for back prop
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=ylogits, labels=y_))
# add an optimiser for back prop
optimiser = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy)
# define an accuracy assessment operation
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
########## temporary solution for test_x, test_y
test_x, test_y = sess.run(next_batch)
total_batch = int(total_input / batch_size)
# define the iterator for the network
for epoch in range(epochs):
avg_cost = 0
for i in range(total_batch):
batch_x, batch_y = sess.run(next_batch)
########## ValueError: Cannot feed value of shape (100,) for Tensor 'Placeholder_1:0' -> y: batch_y
_, c = sess.run([optimiser, cross_entropy], feed_dict={x_shaped: batch_x, y: batch_y})
avg_cost += c / total_batch
test_acc = sess.run(accuracy,feed_dict={x: test_x, y: test_y})
print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost), " test accuracy: {:.3f}".format(test_acc))
summary = sess.run(merged, feed_dict={x: test_x, y: test_y})
print("\nTraining complete!")
print(sess.run(accuracy, feed_dict={x: test_x, y: test_y}))
are you sure that this part:
_, c = sess.run([optimiser, cross_entropy], feed_dict={x_shaped: batch_x, y: batch_y})
doesn't have to be:
_, c = sess.run([optimiser, cross_entropy], feed_dict={x: batch_x, y: batch_y})
furthermore you've a batchsize of 100, the data is right of the array, the shape is not complete.
What you have (dummy example)
np.zeros((100,)).shape
>>> (100,)
here 100 matches the '?' of the required shape: shape '(?, 1)', the one can be easily added, and it often occurs that numpy does not do this. See the following code:
np.expand_dims(np.zeros((100,)), axis=-1).shape
>>> (100, 1)
axis -1 stand for the last axis, you basically tell numpy to add a dimension in the end. This does not affect the data itself, but the shape of the array. So your code should be:
_, c = sess.run([optimiser, cross_entropy], feed_dict={x_shaped: batch_x, y:np.expand_dims(batch_y, axis=-1)})

How can I log individual scalar values from a TensorFlow Variable?

How can I log (with SummaryWriter, e.g. for TensorBoard) of individual scalar elements of a tensor Variable? For example, how can I log individual weights of a given layer or node in a network?
In my example code, I've pressed a general feed-forward neural network into service to do simple linear regression, and want (in that case) to log the weights of the lone node in the lone hidden layer as learning progresses.
I can get these values explicitly during a session with, for example
sess.run(layer_weights)[0][i][0]
for the i-th weight, where layer_weights is a list of the weight Variables; but I can't figure out how to log the corresponding scalar values. If I try
w1 = tf.slice(layer_weights[0], [0], [1])[0]
tf.scalar_summary('w1', w1)
or
w1 = layer_weights[0][1][0]
tf.scalar_summary('w1', w1)
I get
ValueError: Shape (5, 1) must have rank 1
How can I log individual scalar values from a TensorFlow Variable?
from __future__ import (absolute_import, print_function, division, unicode_literals)
import numpy as np
import tensorflow as tf
# Basic model parameters as external flags
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('network_nodes', [5, 1], 'The number of nodes in each layer, including input and output.')
flags.DEFINE_float('epochs', 250, 'Epochs to run')
flags.DEFINE_float('learning_rate', 0.15, 'Initial learning rate.')
flags.DEFINE_string('data_dir', './data', 'Directory to hold training and test data.')
flags.DEFINE_string('train_dir', './_tmp/train', 'Directory to log training (and the network def).')
flags.DEFINE_string('test_dir', './_tmp/test', 'Directory to log testing.')
def variable_summaries(var, name):
with tf.name_scope("summaries"):
mean = tf.reduce_mean(var)
tf.scalar_summary('mean/' + name, mean)
with tf.name_scope('stddev'):
stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
tf.scalar_summary('sttdev/' + name, stddev)
tf.scalar_summary('max/' + name, tf.reduce_max(var))
tf.scalar_summary('min/' + name, tf.reduce_min(var))
tf.histogram_summary(name, var)
def add_layer(input_tensor, input_dim, output_dim, neuron_fn, layer_name):
with tf.name_scope(layer_name):
with tf.name_scope("weights"):
weights = tf.Variable(tf.truncated_normal([input_dim, output_dim], stddev=0.1))
with tf.name_scope("biases"):
biases = tf.Variable(tf.constant(0.1, shape=[output_dim]))
with tf.name_scope('activations'):
with tf.name_scope('weighted_inputs'):
weighted_inputs = tf.matmul(input_tensor, weights) + biases
tf.histogram_summary(layer_name + '/weighted_inputs', weighted_inputs)
output = neuron_fn(weighted_inputs)
return output, weights, biases
def make_ff_network(nodes, input_activation, hidden_activation_fn=tf.nn.sigmoid, output_activation_fn=tf.nn.softmax):
layer_activations = [input_activation]
layer_weights = []
layer_biases = []
n_layers = len(nodes)
for l in range(1, n_layers):
a, w, b = add_layer(layer_activations[l - 1], nodes[l - 1], nodes[l],
output_activation_fn if l == n_layers - 1 else hidden_activation_fn,
'output_layer' if l == n_layers - 1 else 'hidden_layer' + (
'_{}'.format(l) if n_layers > 3 else ''))
layer_activations += [a]
layer_weights += [w]
layer_biases += [b]
with tf.name_scope('output'):
net_activation = tf.identity(layer_activations[-1], name='network_activation')
return net_activation, layer_weights, layer_biases
# Inputs and outputs
with tf.name_scope('data'):
x = tf.placeholder(tf.float32, shape=[None, FLAGS.network_nodes[0]], name='inputs')
y_ = tf.placeholder(tf.float32, shape=[None, FLAGS.network_nodes[-1]], name='correct_outputs')
# Network structure
y, layer_weights, layer_biases = make_ff_network(FLAGS.network_nodes, x, output_activation_fn=tf.identity)
# Metrics and operations
with tf.name_scope('accuracy'):
with tf.name_scope('loss'):
loss = tf.reduce_mean(tf.square(y - y_))
# NONE OF THESE WORK:
#w1 = tf.slice(layer_weights[0], [0], [1])[0]
#tf.scalar_summary('w1', w1)
#w1 = layer_weights[0][1][0]
#tf.scalar_summary('w1', w1)
tf.scalar_summary('loss', loss)
train_step = tf.train.GradientDescentOptimizer(FLAGS.learning_rate).minimize(loss)
# Logging
train_writer = tf.train.SummaryWriter(FLAGS.train_dir, tf.get_default_graph())
test_writer = tf.train.SummaryWriter(FLAGS.test_dir)
merged = tf.merge_all_summaries()
W = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
train_x = np.random.rand(100000, FLAGS.network_nodes[0])
train_y = np.array([np.dot(W, train_x.T)+ 6.0]).T
test_x = np.random.rand(1000, FLAGS.network_nodes[0])
test_y = np.array([np.dot(W, test_x.T)+ 6.0]).T
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for ep in range(FLAGS.epochs):
sess.run(train_step, feed_dict={x: train_x, y_: train_y})
summary = sess.run(merged, feed_dict={x: test_x, y_: test_y})
test_writer.add_summary(summary, ep+1)
# THESE WORK
print('w1 = {}'.format(sess.run(layer_weights)[0][0][0]))
print('w2 = {}'.format(sess.run(layer_weights)[0][1][0]))
print('w3 = {}'.format(sess.run(layer_weights)[0][2][0]))
print('w4 = {}'.format(sess.run(layer_weights)[0][3][0]))
print('w5 = {}'.format(sess.run(layer_weights)[0][4][0]))
print(' b = {}'.format(sess.run(layer_biases)[0][0]))
There are different errors in the code.
The main problem is that you are passing a python list of tensors to the scalar_summary.
The error says that your are passing a tensor that does not have Rank 1 is related to the slice operation.
You want to pass the weights and log them layer per layer. One way to do that is to log each weight on each layer:
for weight in layer_weights:
tf.scalar_summary([ ['%s_w%d%d' % (weight.name, i,j) for i in xrange(len(layer_weights))] for j in xrange(5) ], weight)
This will produce in tensorboard tensorboard --logdir=./_tmp/test these nice graphs

Categories