i'm trying to create a neural network model for a kaggle competition using mnist dataset. currently, my code looks like this since i am trying to capture certain metrics. however, i can't seem to figure out how to turn this into an output to submit.
import time
import tensorflow.compat.v1 as tf
import pandas as pd
import numpy as np
from tensorflow.python.framework import ops
import ssl
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
# Handle target environment that doesn't support HTTPS verification
ssl._create_default_https_context = _create_unverified_https_context
# Load training and testing data directly from TensorFlow
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
# Initialize metrics
metrics = {}
# Initialize metric names
names = ['Number of Hidden Layers', 'Nodes per Layer', 'Time in Seconds',
'Training Set Accuracy', 'Test Set Accuracy']
# Set fixed parameters
n_epochs = 20
batch_size = 50
learning_rate = 0.01
# Function that creates batch generator used in training
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
# Start timer
start = time.process_time()
n_hidden = 300
# Reset the session
# Set X and y placeholders
X = tf.placeholder(tf.float32, shape=(None, 784), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden, name="hidden1",
hidden2 = tf.layers.dense(hidden1, n_hidden, name="hidden2",
logits = tf.layers.dense(hidden2, 10, name="outputs")
y_proba = tf.nn.softmax(logits)
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.Session() as sess:
for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_train, y: y_train})
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
# Record the clock time it takes
duration = time.process_time() - start
metrics['Model 1'] = [2, n_hidden, duration, acc_train, acc_test]
# Convert metrics dictionary to dataframe for display
results_summary = pd.DataFrame.from_dict(metrics, orient='index')
results_summary.columns = names
# Sort by model number
results_summary.sort_values(by=['index'], axis=0, inplace=True)
results_summary.set_index(['index'], inplace=True)
results_summary.index.name = None
# Export to csv
i need to create an output that looks something like this in csv file:
ImageId Label
0 1 2
1 2 0
2 3 9
3 4 0
4 5 3
would i have to recreate the whole thing in order to actually create "y_pred" when doing something like model.predict(X_test), or can i just reshape the existing code in some way to do this? ideally, i would like to capture predicted values and compare them to true values using a confusion matrix.
Since you're using old tensorflow style, predict using old tensorflow style at the end:
feed_dict = {X: X_test}
classification = tf.run(y_proba, feed_dict)
label = numpy.argmax(classification, axis=-1)
Maybe you need to create a new session for this, or use the existing sess.run, I'm not sure how this works, try to see which option gives out reasonable results.
I"m working through the an example problem with TensorFlow (working with placeholders specifically) and don't understand why I'm receiving (what appears to be) a shape/type error when I'm fairly confident those are what they should be.
I've tried playing around with the various float types in X_batch & y_batch, tried changing the size from being "None" (unspecified) to what I will be passing in (100), none of which have worked
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
def fetch_batch(epoch, batch_index, batch_size, X, y):
np.random.seed(epoch * batch_index)
indices = np.random.randint(m, size=batch_size)
X_batch = X[indices]
y_batch = y[indices]
return X_batch.astype('float32'), y_batch.astype('float32')
if __name__ == "__main__":
housing = fetch_california_housing()
m, n = housing.data.shape
# standardizing input data
standardized_housing = (housing.data - np.mean(housing.data)) / np.std(housing.data)
std_housing_bias = np.c_[np.ones((m, 1)), standardized_housing]
# using the size "n+1" to account for the bias term
X = tf.placeholder(tf.float32, shape=(None, n+1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1, 1), dtype=tf.float32, name='theta')
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
n_epochs = 1000
learning_rate = 0.01
batch_size = 100
n_batches = int(np.ceil(m / batch_size))
# using the Gradient Descent Optimizer class from tensorflow's optimizer selection
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
# creates a node in the computational graph that initializes all variables when it is run
init = tf.global_variables_initializer()
with tf.Session() as sess:
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size, std_housing_bias, \
housing.target.reshape(-1, 1))
print(X_batch.shape, X_batch.dtype, y_batch.shape, y_batch.dtype)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
if epoch % 100 == 0:
print(f"Epoch {epoch} MSE = {mse.eval()}")
best_theta = theta.eval()
print("Mini Batch Gradient Descent Beta Estimates")
The error I'm getting is:
InvalidArgumentError: You must feed a value for placeholder tensor 'X' with dtype float and shape [?,9]
[[node X (defined at /Users/marshallmcquillen/Scripts/lab.py:25) ]]
I've thrown a print statement printing X_batch and y_batch properties, and they are what I expect them to be but still aren't working.
The mse you want to evaluate is also dependent on placeholder X and y therefore you need to provide with feed_dict as well. You can fix it by changing the line to
if epoch % 100 == 0:
print(f"Epoch {epoch} MSE = {mse.eval(feed_dict={X: X_batch, y: y_batch})}")
But since you are trying to evaluate the model, it is reasonable to use a test dataset. So ideally it would be
if epoch % 100 == 0:
print(f"Epoch {epoch} MSE = {mse.eval(feed_dict={X: X_test, y: y_test})}")
I'm trying to create a neural network that takes 13 features as input from multiple csv files one at a time and measure accuracy after each iteration. Here is my code snippet:
import tensorflow as tf
import numpy as np
from tensorflow.contrib.layers import fully_connected
import os
import pandas as pd
n_inputs = 13
n_hidden1 = 30
n_hidden2 = 10
n_outputs = 2
learning_rate = 0.01
n_epochs = 40
batch_size = 1
patient_id = os.listdir('./subset_numerical')
output = pd.read_csv('output.csv')
sepsis_pat = output['output'].tolist()
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
y = tf.placeholder(tf.int64, shape=[None], name="y")
def data_processor(n):
id = pd.read_csv('./subset_numerical/'+patient_id[n])
id_input = np.array([id['VALUE'].tolist()])
for s in sepsis_pat:
if str(s) == str(patient_id[n].split('.')[0]):
a = 1
if a == 1:
a = 0
return [id_input, np.array([1])]
return [id_input, np.array([0])]
def test_set():
id_combined = []
out = []
for p in range(300, len(patient_id)):
id1 = pd.read_csv('./subset_numerical/' + patient_id[p])
id_input1 = np.array(id1['VALUE'].tolist())
for s in sepsis_pat:
if str(s) == str(patient_id[p].split('.')[0]):
a = 1
if a == 1:
a = 0
out.append([1, 0])
out.append([0, 1])
return [np.array(id_combined), np.array(out)]
# Declaration of hidden layers and calculation of loss goes here
# Construction phase begins
with tf.name_scope("dnn"):
hidden1 = fully_connected(X, n_hidden1, scope="hidden1")
hidden2 = fully_connected(hidden1, n_hidden2, scope="hidden2")
logits = fully_connected(hidden2, n_outputs, scope="outputs", activation_fn=None) # We will apply softmax here later
# Calculating loss
with tf.name_scope("loss"):
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
# Training with gradient descent optimizer
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
# Measuring accuracy
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
accuracy_summary = tf.summary.scalar('accuracy', accuracy)
# Variable initialization and saving model goes here
# Construction is finished. Let's get this to work.
with tf.Session() as sess:
for epoch in range(n_epochs):
a = 0
for iteration in range(300 // batch_size):
X_batch, y_batch = data_processor(iteration)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
X_test, y_test = test_set()
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
save_path = saver.save(sess, "./my_model_final.ckpt")
But I'm stuck with this error:
logits and labels must be same size: logits_size=[1,2] labels_size=[1,1]
The error seems to occur at this line:
correct = tf.nn.in_top_k(logits, y, 1)
What am I doing wrong?
Based on your error log provided, the problem is in this line of your code:
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
Ensure that both of them have same shape and dtype.
The shape should be of the format [batch_size, num_classes] and dtype should be of type float16, float32 or float64. Check the documentation of softmax_cross_entropy_with_logits for more details.
Since you've defined n_outputs = 2, the shape of logits is [?, 2] (? means batch size), while the shape of y is just [?]. In order to apply the softmax loss function, the last FC layer should return a flat tensor, which can be compared with y.
Solution: set n_outputs = 1.
I understand that there are advantages (especially as I expand the scope of the models I build and the size of the datasets they work on) to using TensorFlow's new Dataset as the idiom for my data feeding pipeline. However I'm having trouble mapping my existing feed_dict based code to this new model.
One problem I face is that I can't sort out how batching and epochs interact, or how these interleave with the logging and validation that I often do.
For example, how does something like the following map to using Dataset?
# Load and process data into tensors of dimension (N, C_i) for input and (N, C_o) for output
# where N is the number of examples and C_ is the number of chanels, and the values are activations
train_x, train_y, valid_x, valid_y = load_data(file, [segments], ...)
train_size = len(train_x)
train_stats_feed = {input_activation: train_x, correct_output: train_y, is_train: False}
valid_stats_feed = {input_activation: valid_x, correct_output: valid_y, is_train: False}
with tf.Session(config=tf.ConfigProto(...)) as sess:
# Some analysis; not always done but the code needs to support it
train_writer.add_summary(sess.run(merged, feed_dict=train_stats_feed), 0)
test_writer.add_summary(sess.run(merged, feed_dict=valid_stats_feed), 0)
test_writer.add_summary(sess.run(gs_summary), 0)
print(log_fmt.format(0, float(sess.run(accuracy, feed_dict=valid_stats_feed)),
float(sess.run(loss, feed_dict=valid_stats_feed))))
for ep in range(epochs):
# Slice the training data into random batches
batch_indices = np.array_split(np.random.permutation(train_size), int(train_size/mb_size))
for mini_batch_indices in batch_indices:
sess.run(train_step, feed_dict={input_activation: train_x[mini_batch_indices],
correct_output: train_y[mini_batch_indices], is_train: True})
gs = int(sess.run(global_step))
if gs % log_steps == 0:
test_writer.add_summary(sess.run(merged, feed_dict=valid_stats_feed), gs)
train_writer.add_summary(sess.run(merged, feed_dict=train_stats_feed), gs)
acc = float(sess.run(accuracy, feed_dict=valid_stats_feed))
print(log_fmt.format(gs, acc, float(sess.run(loss, feed_dict=valid_stats_feed))))
print(ep_fmt.format(ep + 2))
test_writer.add_summary(sess.run(gs_summary), ep + 1)
Some of the less obvious definitions for the above, if needed:
# Preliminaries
# Some basic preliminaries, the details of which are not important to the question
# Mostly pretty standard; obvious things omitted from MWE for brevity
global_step = tf.Variable(0, trainable=False, name='global_step')
validation_accuracy = tf.Variable(0.0, trainable=False, name='validation_accuracy', dtype=tf.float32)
is_train = tf.placeholder(tf.bool, [], name='is_train')
input_activation = tf.placeholder(tf.float32, shape=[None, in_nodes], name='inputs')
correct_output = tf.placeholder(tf.float32, shape=[None, out_nodes], name='correct_outputs')
network_output = tf.identity(out_activations)
correct_predictions = correct_fn(correct_output, network_output)
accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
error = cost_fn(correct_output, network_output)
loss = error + FLAGS.regularization_weight * sum(tf.nn.l2_loss(w) for w in layer_weights)
train_step = tf.train.MomentumOptimizer(learning_rate, momentum=momentum).minimize(loss, global_step=global_step)
# Logging
train_writer = tf.summary.FileWriter(trainlogfile, tf.get_default_graph())
test_writer = tf.summary.FileWriter(testlogfile, tf.get_default_graph())
gs_summary = tf.summary.scalar('global_step_at_epoch', global_step)
merged = tf.summary.merge_all()
Here're few lines for training to get started. Same logics apply for validation
# Define placeholder for inputs data and labels
inputs_placeholder = tf.placeholder(train_x.dtype, train_x.shape)
labels_placeholder = tf.placeholder(train_y.dtype, train_y.shape)
# Define a Dataset object using the above placeholders
dataset = tf.contrib.data.Dataset.from_tensor_slices((inputs_placeholder, labels_placeholder))
# Define batch_size
batch_size = 128
dataset = dataset.batch(batch_size)
# Define iterator
iterator = dataset.make_initializable_iterator()
# Get one batch
next_example, next_label = iterator.get_next()
# calculate loss from the model fucntion you are using
loss = some_model(next_example, next_label)
# Set number of Epochs here
num_epochs = 100
for _ in range(num_epochs):
sess.run(iterator.initializer, feed_dict={inputs_placeholder: train_x, labels_placeholder: train_y}))
while True:
_loss = sess.run(loss)
except tf.errors.OutOfRangeError:
Does anyone have an example for code that uses tf.train.AdadeltaOptimizer with good results?
I have a TF graph, that was originally set with tf.train.AdamOptimizer, and is working well. When I replace it with AdadeltaOptimizer, with the default params, it gives lousy results.
I used Cuda 7.5.
The below is example code which works with 'AdadeltaOptimizer' optimizer. It works with 'Adam'. The only difference between them that Adam is insensitive to "learning rate" and 'Adadelta' is sensitive.
I advice you to read more about optimization algorithm (like here).
In your own example, just try to change 'learning rate' to be smaller or bigger (it is named 'hyperparameter optimization').
From my experience, 'Adam' is a very good optimizer for RNN, better than 'AdaDelta' (using example code, 'Adam' achieve better score much faster). On the other hand, for CNN, SGD+Momentum works best.
Code, which learn MNIST classification using Bi-LSTM:
# Mnist classification using Bi-LSTM
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
learning_rate = 0.01
training_epochs = 100
batch_size = 64
seq_length = 28
heigh_image = 28
hidden_size = 128
class_numer = 10
input = tf.placeholder(tf.float32, [None, None, heigh_image])
target = tf.placeholder(tf.float32, [None, class_numer])
seq_len = tf.placeholder(tf.int32, [None])
def fulconn_layer(input_data, output_dim, activation_func=None):
input_dim = int(input_data.get_shape()[1])
W = tf.Variable(tf.random_normal([input_dim, output_dim]))
b = tf.Variable(tf.random_normal([output_dim]))
if activation_func:
return activation_func(tf.matmul(input_data, W) + b)
return tf.matmul(input_data, W) + b
with tf.name_scope("BiLSTM"):
with tf.variable_scope('forward'):
lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
with tf.variable_scope('backward'):
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=input,sequence_length=seq_len, dtype=tf.float32, scope="BiLSTM")
# As we have Bi-LSTM, we have two output, which are not connected. So merge them
outputs = tf.concat(2, outputs)
# As we want do classification, we only need the last output from LSTM.
last_output = outputs[:,0,:]
# Create the final classification layer
yhat = fulconn_layer(last_output, class_numer)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(yhat, target))
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost) # AdamOptimizer
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(target, 1), tf.argmax(yhat, 1)), tf.float32))
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) as session:
print ("Start Learing")
for epoch in range(training_epochs):
for i in range(int(mnist.train.num_examples/batch_size)):
x_batch, y_batch = mnist.train.next_batch(batch_size)
x_batch = x_batch.reshape([batch_size, seq_length, heigh_image])
train_seq_len = np.ones(batch_size) * seq_length
session.run([optimizer], feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})
train_accuracy = session.run(accuracy, feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})
x_test = mnist.test.images.reshape([-1, seq_length, heigh_image])
y_test = mnist.test.labels
test_seq_len = np.ones(x_test.shape[0]) * seq_length
test_accuracy = session.run(accuracy, feed_dict={input: x_test, target: y_test, seq_len: test_seq_len})
print("epoch: %d, train_accuracy: %3f, test_accuracy: %3f" % (epoch, train_accuracy, test_accuracy))
I have written my self a tensorflow class as below, but I met some problem when I am trying to set some weight to zero after training manually in function refine_init_weight. In this function, I tried setting all numbers to zero once it is below some value and see how would the accuracy rate change. The problem is, when I reran self.sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}), it seems that its value does not changed accordingly. I am just wondering where should I change the symbolic variable (the accuracy depends on the weights I changed) at this case?
import tensorflow as tf
from nncomponents import *
from helpers import *
from sda import StackedDenoisingAutoencoder
class DeepFeatureSelection:
def __init__(self, X_train, X_test, y_train, y_test, weight_init='sda', hidden_dims=[100, 100, 100], epochs=1000,
lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, learning_rate=0.1, optimizer='FTRL'):
# Initiate the input layer
# Get the dimension of the input X
n_sample, n_feat = X_train.shape
n_classes = len(np.unique(y_train))
self.epochs = epochs
# Store up original value
self.X_train = X_train
self.y_train = one_hot(y_train)
self.X_test = X_test
self.y_test = one_hot(y_test)
# Two variables with undetermined length is created
self.var_X = tf.placeholder(dtype=tf.float32, shape=[None, n_feat], name='x')
self.var_Y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes], name='y')
self.input_layer = One2OneInputLayer(self.var_X)
self.hidden_layers = []
layer_input = self.input_layer.output
# Initialize the network weights
weights, biases = init_layer_weight(hidden_dims, X_train, weight_init)
# Create hidden layers
for init_w,init_b in zip(weights, biases):
self.hidden_layers.append(DenseLayer(layer_input, init_w, init_b))
layer_input = self.hidden_layers[-1].output
# Final classification layer, variable Y is passed
self.softmax_layer = SoftmaxLayer(self.hidden_layers[-1].output, n_classes, self.var_Y)
n_hidden = len(hidden_dims)
# regularization terms on coefficients of input layer
self.L1_input = tf.reduce_sum(tf.abs(self.input_layer.w))
self.L2_input = tf.nn.l2_loss(self.input_layer.w)
# regularization terms on weights of hidden layers
L1s = []
L2_sqrs = []
for i in xrange(n_hidden):
self.L1 = tf.add_n(L1s)
self.L2_sqr = tf.add_n(L2_sqrs)
# Cost with two regularization terms
self.cost = self.softmax_layer.cost \
+ lambda1*(1.0-lambda2)*0.5*self.L2_input + lambda1*lambda2*self.L1_input \
+ alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2*self.L1
# FTRL optimizer is used to produce more zeros
# self.optimizer = tf.train.FtrlOptimizer(learning_rate=learning_rate).minimize(self.cost)
self.optimizer = optimize(self.cost, learning_rate, optimizer)
self.accuracy = self.softmax_layer.accuracy
self.y = self.softmax_layer.y
def train(self, batch_size=100):
sess = tf.Session()
self.sess = sess
for i in xrange(self.epochs):
x_batch, y_batch = get_batch(self.X_train, self.y_train, batch_size)
sess.run(self.optimizer, feed_dict={self.var_X: x_batch, self.var_Y: y_batch})
if i % 2 == 0:
l = sess.run(self.cost, feed_dict={self.var_X: x_batch, self.var_Y: y_batch})
print('epoch {0}: global loss = {1}'.format(i, l))
self.selected_w = sess.run(self.input_layer.w)
print("Train accuracy:",sess.run(self.accuracy, feed_dict={self.var_X: self.X_train, self.var_Y: self.y_train}))
print("Test accuracy:",sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
print("Final test accuracy:",sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
def refine_init_weight(self, threshold=0.001):
refined_w = np.copy(self.selected_w)
refined_w[refined_w < threshold] = 0
print("Test accuracy refined:",self.sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
(I'll just repost my comment as an answer)
You need to run the assign operation you created, otherwise it is just added to the graph and never executed.
assign_op = self.input_layer.w.assign(refined_w)
If you want to do this in Tensorflow you could create a boolean mask of the weight variable with tf.greater and tf.less, convert this mask to tf.float32 and multiply it with the weight array.