Neural Network not converging in multilabel classification task - TensorFlow - python

I am a beginner with TensorFlow and I really need some help with this task. I am doing some image pixels classification, and my problem is set this way:
my inputs is array X that contain 20 values. These values represent 4 pixels (there are 5 values per pixel). My output is an array of 4 values, where each value can be either 1 or 0, and it means that the specific pixel may or may not have a specific characteristic. As you can imagine, it is possible that my y is of the form y=[1, 0, 0, 1], so I can have multiple classes for each instance.
To perform this classification task, I organized a neural network with an input layer of 20 neurons, a hidden layer of 15, followed by another hidden layer of 10 and eventually there is an output layer of 4. The activation functions for the hidden layers are ReLU, and I applied 50% dropout to them. As a loss function to minimize, I use tensorflow's sigmoid_cross_entropy_with_logits, because it computes independent probabilities for each class, allowing to perform multilabel classification.
When I first attempted to train the network, I was getting all NaN results, because of (I think) an exploding gradient problem. This apparently solved after I lowered the learning rate.
My problem now is that this network is not converging at all, and I believe this is because something is wrong with the cost and activation functions that I am using.
Note: the input has been scaled with sklearn.preprocessing.StandardScaler
Here is the code:
import tensorflow as tf
n_inputs = 20
n_hidden1 = 15
n_hidden2 = 10
n_outputs = 4
dropout_rate = 0.5
learning_rate = 0.000000001
#This is a boolean value that indicates when to apply dropout
training = tf.placeholder_with_default(True, shape=(), name="training")
X = tf.placeholder(tf.float64, shape=(None, n_inputs), name ="X")
y = tf.placeholder(tf.int64, shape=(None, 4), name = "y")
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation = tf.nn.relu)
hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training = training)
hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.relu)
hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training = training)
logits = tf.layers.dense(hidden2_drop, n_outputs, name="outputs")
with tf.name_scope("loss"):
xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.cast(y, tf.float64), logits = tf.cast(logits, tf.float64))
loss = tf.reduce_mean(xentropy, name = 'loss')
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 50
batch_size = 50
n_batches = 1000000
#training
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for i in range(n_batches):
X_batch = np.asarray(X_batches[i]).reshape(-1, 20)
y_batch = np.asarray(y_batches[i]).reshape(-1, 4)
sess.run(training_op, feed_dict ={X: X_batch, y: y_batch, training:True})
if (i % 10000) == 0:
raws = logits.eval(feed_dict={X: X_batch, training:False})
print("epoca = "+str(epoch))
print("iterazione = "+str(i))
print("accuratezza = "+str(get_global_accuracy_rate(raws, y_batch)))
print("X = "+str(X_batch[0])+ " y = "+str(y_batch[0]))
print("raws = "+str(raws[0])+" pred = " + str(get_prediction(raws[0])))
save_path= saver.save(sess, "./my_model_final_1.ckpt")
Thank you so much in advance for any help!!

Related

How do I fix this size of tensor error for my NN classifier PyTorch

I'm having trouble understanding why this is throwing an error. This code is pulled directly from the PyTorch documentation for a NN classifier for the fashion MNIST dataset. However when I try to flip this to the MNIST handwritten digits data set it comes up with the following error:
RuntimeError: The size of tensor a (10) must match the size of tensor b (64) at non-singleton dimension 1
This occurs when using the loss function during the training loop function. Can anyone help me understand why this is happening. Thanks!
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
import torchvision.models as models
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = "cpu"
print(f"Using {device} device")
training_data = datasets.MNIST(
root="data",
train=True,
download=True,
transform=ToTensor()
)
test_data = datasets.MNIST(
root="data",
train=False,
download=True,
transform=ToTensor()
)
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
def save_checkpoint(state, filename = "checkpoint.pth.tar"):
print("=> Saving checkpoint")
torch.save(state, filename)
model = NeuralNetwork().to(device)
learning_rate = 1e-3
batch_size = 64
epochs = 10
# Initialize the loss function
loss_fn = nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimiser)
test_loop(test_dataloader, model, loss_fn)
print("Done!")
torch.nn.MSELoss is an implemention of mean squared error. You can't measure the difference between two tensors if they're different sizes (MSELoss does not allow for broadcasting). So if you're using MSELoss, then the predictions and the targets must be the same shape. In your case, preds is a tensor of shape [64, 10], and y is a tensor of shape [64].
The reason y is of shape [64] rather than [64, 10] is that most classification dataset implementations represent targets as integer labels rather than one-hot encoded vectors. In theory, you could convert these integer label targets to one-hot encoded targets.
But in reality, since this is a classification problem, you should probably be using something like nn.CrossEntropyLoss rather than nn.MSELoss. The former is a conventional classification loss function, and it allows the targets to be integer labels rather than one-hot labels (so just swapping out MSELoss for CrossEntropyLoss should solve your problem). MSELoss is better suited for regression tasks and such.

Deep Learning implementation in Tensorflow or Keras give drastic different results

Context: I'm using a fully convolutional network to perform image segmentation. Typically, the input is an RGB image shape = [512, 256] and the target is a 2 channels binary mask defining the annotated regions (2nd channel is the opposite of the fist channel).
Question: I have the same CNN implementation using Tensorflow and Keras. But the Tensorflow model doesn't start learning. Actually, the loss even grows with the number of epochs! What is wrong in this Tensorflow implementation that prevents it from learning?
Setup: The dataset is split into 3 subsets: training (78%), testing (8%) and validation (14%) sets which are fed to the network by batches of 8 images. The graphs show the evolution of the loss for each subsets. The images show the prediction after 10 epoch for two different images.
Tensorflow implementation and results
import tensorflow as tf
tf.reset_default_graph()
x = inputs = tf.placeholder(tf.float32, shape=[None, shape[1], shape[0], 3])
targets = tf.placeholder(tf.float32, shape=[None, shape[1], shape[0], 2])
for d in range(4):
x = tf.layers.conv2d(x, filters=np.exp2(d+4), kernel_size=[3,3], strides=[1,1], padding="SAME", activation=tf.nn.relu)
x = tf.layers.max_pooling2d(x, strides=[2,2], pool_size=[2,2], padding="SAME")
x = tf.layers.conv2d(x, filters=2, kernel_size=[1,1])
logits = tf.image.resize_images(x, [shape[1], shape[0]], align_corners=True)
prediction = tf.nn.softmax(logits)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=logits))
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
def run(mode, x_batch, y_batch):
if mode == 'TRAIN':
return sess.run([loss, optimizer], feed_dict={inputs: x_batch, targets: y_batch})
else:
return sess.run([loss, prediction], feed_dict={inputs: x_batch, targets: y_batch})
Keras implementation and reslults
import keras as ke
ke.backend.clear_session()
x = inputs = ke.layers.Input(shape=[shape[1], shape[0], 3])
for d in range(4):
x = ke.layers.Conv2D(int(np.exp2(d+4)), [3,3], padding="SAME", activation="relu")(x)
x = ke.layers.MaxPool2D(padding="SAME")(x)
x = ke.layers.Conv2D(2, [1,1], padding="SAME")(x)
logits = ke.layers.Lambda(lambda x: ke.backend.tf.image.resize_images(x, [shape[1], shape[0]], align_corners=True))(x)
prediction = ke.layers.Activation('softmax')(logits)
model = ke.models.Model(inputs=inputs, outputs=prediction)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy")
def run(mode, x_batch, y_batch):
if mode == 'TRAIN':
loss = model.train_on_batch(x=x_batch, y=y_batch)
return loss, None
else:
loss = model.evaluate(x=x_batch, y=y_batch, batch_size=None, verbose=0)
prediction = model.predict(x=x_batch, batch_size=None)
return loss, prediction
There must be a difference between the two but my understanding of the documentation lead me nowhere. I would be really interested to know where the difference lies. Thanks in advance!
The answer was in the Keras implementation of softmax where they subtract an unexpected max:
def softmax(x, axis=-1):
# when x is a 2 dimensional tensor
e = K.exp(x - K.max(x, axis=axis, keepdims=True))
s = K.sum(e, axis=axis, keepdims=True)
return e / s
Here is the Tensorflow implementation updated with the max hack and the good results associated
import tensorflow as tf
tf.reset_default_graph()
x = inputs = tf.placeholder(tf.float32, shape=[None, shape[1], shape[0], 3])
targets = tf.placeholder(tf.float32, shape=[None, shape[1], shape[0], 2])
for d in range(4):
x = tf.layers.conv2d(x, filters=np.exp2(d+4), kernel_size=[3,3], strides=[1,1], padding="SAME", activation=tf.nn.relu)
x = tf.layers.max_pooling2d(x, strides=[2,2], pool_size=[2,2], padding="SAME")
x = tf.layers.conv2d(x, filters=2, kernel_size=[1,1])
logits = tf.image.resize_images(x, [shape[1], shape[0]], align_corners=True)
# The misterious hack took from Keras
logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=-1), -1)
prediction = tf.nn.softmax(logits)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=logits))
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
def run(mode, x_batch, y_batch):
if mode == 'TRAIN':
return sess.run([loss, optimizer], feed_dict={inputs: x_batch, targets: y_batch})
else:
return sess.run([loss, prediction], feed_dict={inputs: x_batch, targets: y_batch})
Huge thanks to Simon for pointing this out on the Keras implementation :-)

Tensorflow Multi-layer perceptron graph won't converge

I am new to python and tensorflow. After better(maybe) understanding DNN and its math. I start to learn to use tensorflow by exercises.
One of my exercises is to predict x^2. Which means after fine training. When I give 5.0, it will predict 25.0.
Parameters and settings:
Cost function = E((y-y')^2)
Two hidden Layers and they are fully connected.
learning_rate = 0.001
n_hidden_1 = 3
n_hidden_2 = 2
n_input = 1
n_output = 1
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
def generate_input():
import random
val = random.uniform(-10000, 10000)
return np.array([val]).reshape(1, -1), np.array([val*val]).reshape(1, -1)
# tf Graph input
# given one value and output one value
x = tf.placeholder("float", [None, 1])
y = tf.placeholder("float", [None, 1])
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
distance = tf.sub(pred, y)
cost = tf.reduce_mean(tf.pow(distance, 2))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
avg_cost = 0.0
for iter in range(10000):
inp, ans = generate_input()
_, c = sess.run([optimizer, cost], feed_dict={x: inp, y: ans})
print('iter: '+str(iter)+' cost='+str(c))
However, it turns out that c sometimes gets larger,and sometimes lower. (but it is big)
It seems that your training data have big range because of the statement val = random.uniform(-10000, 10000), try to do some data preprocessing before you train. for example,
val = random.uniform(-10000, 10000)
val = np.asarray(val).reshape(1, -1)
val -= np.mean(val, axis=0)
val /= np.std(val, axis=0)
As for loss value, it's ok that sometimes it gets larger,and sometimes lower, just make sure the loss is decreasing when training epoch increase in general. PS: we are often using SGD optimizer.

Tensorflow accuracy at .99 but predictions awful

Maybe I'm making predictions wrong?
Here's the project... I have a greyscale input image that I am trying to segment. The segmentation is a simple binary classification (think of foreground vs background). So the ground truth (y) is a matrix of 0's and 1's -- so there's 2 classifications. Oh and the input image is a square, so I just use one variable called n_input
My accuracy essentially converges to 0.99 but when I make a prediction I get all zero's. EDIT --> there is a single 1 in each output matrices, both in the same place...
Here's my session code(everything else is working)...
with tf.Session() as sess:
sess.run(init)
summary = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
step = 1
from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
data = scroll_data.read_data('/home/kendall/Desktop/')
# Keep training until reach max iterations
flag = 0
# while flag == 0:
while step * batch_size < training_iters:
batch_y, batch_x = data.train.next_batch(batch_size)
# pdb.set_trace()
# batch_x = batch_x.reshape((batch_size, n_input))
batch_x = batch_x.reshape((batch_size, n_input, n_input))
batch_y = batch_y.reshape((batch_size, n_input, n_input))
batch_y = convert_to_2_channel(batch_y, batch_size)
# batch_y = batch_y.reshape((batch_size, n_output, n_classes))
batch_y = batch_y.reshape((batch_size, 200, 200, n_classes))
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
keep_prob: dropout})
if step % display_step == 0:
flag = 1
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y,
keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step += 1
print "Optimization Finished!"
save_path = "model.ckpt"
saver.save(sess, save_path)
im = Image.open('/home/kendall/Desktop/HA900_frames/frame0635.tif')
batch_x = np.array(im)
pdb.set_trace()
batch_x = batch_x.reshape((1, n_input, n_input))
batch_x = batch_x.astype(float)
# pdb.set_trace()
prediction = sess.run(pred, feed_dict={x: batch_x, keep_prob: 1.})
print prediction
arr1 = np.empty((n_input,n_input))
arr2 = np.empty((n_input,n_input))
for i in xrange(n_input):
for j in xrange(n_input):
for k in xrange(2):
if k == 0:
arr1[i][j] = prediction[0][i][j][k]
else:
arr2[i][j] = prediction[0][i][j][k]
# prediction = np.asarray(prediction)
# prediction = np.reshape(prediction, (200,200))
# np.savetxt("prediction.csv", prediction, delimiter=",")
np.savetxt("prediction1.csv", arr1, delimiter=",")
np.savetxt("prediction2.csv", arr2, delimiter=",")
Since there are two classifications, that end part (with the couple of loops) is just to partition the prediction into two 2x2 matrices.
I saved the prediction arrays to a CSV file, and like I said, they were all zeros.
I have also confirmed all data is correct (dimensions and values).
Why would the training converge, but predictions are awful?
If you want to look at all the code, here it is...
import tensorflow as tf
import pdb
import numpy as np
from numpy import genfromtxt
from PIL import Image
# Import MINST data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.001
training_iters = 20000
batch_size = 128
display_step = 1
# Network Parameters
n_input = 200 # MNIST data input (img shape: 28*28)
n_output = 40000 # MNIST total classes (0-9 digits)
n_classes = 2
#n_input = 200
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input, n_input])
y = tf.placeholder(tf.float32, [None, n_input, n_input, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, n_input, n_input, 1])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
conv1 = tf.nn.local_response_normalization(conv1)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = tf.nn.local_response_normalization(conv2)
conv2 = maxpool2d(conv2, k=2)
# Convolution Layer
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling)
conv3 = tf.nn.local_response_normalization(conv3)
conv3 = maxpool2d(conv3, k=2)
# pdb.set_trace()
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
output = []
for i in xrange(2):
output.append(tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
return output
# return tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out']))
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# 5x5 conv, 32 inputs, 64 outputs
'wc3': tf.Variable(tf.random_normal([5, 5, 64, 128])),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([25*25*128, 1024])),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_output]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bc3': tf.Variable(tf.random_normal([128])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_output]))
}
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
# pdb.set_trace()
pred = tf.pack(tf.transpose(pred,[1,2,0]))
pred = tf.reshape(pred, [-1,n_input,n_input,n_classes])
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
saver = tf.train.Saver()
def convert_to_2_channel(x, batch_size):
#assume input has dimension (batch_size,x,y)
#output will have dimension (batch_size,x,y,2)
output = np.empty((batch_size, 200, 200, 2))
temp_arr1 = np.empty((batch_size, 200, 200))
temp_arr2 = np.empty((batch_size, 200, 200))
for i in xrange(batch_size):
for j in xrange(200):
for k in xrange(200):
if x[i][j][k] == 1:
temp_arr1[i][j][k] = 1
temp_arr2[i][j][k] = 0
else:
temp_arr1[i][j][k] = 0
temp_arr2[i][j][k] = 1
for i in xrange(batch_size):
for j in xrange(200):
for k in xrange(200):
for l in xrange(2):
if l == 0:
output[i][j][k][l] = temp_arr1[i][j][k]
else:
output[i][j][k][l] = temp_arr2[i][j][k]
return output
# Launch the graph
with tf.Session() as sess:
sess.run(init)
summary = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
step = 1
from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
data = scroll_data.read_data('/home/kendall/Desktop/')
# Keep training until reach max iterations
flag = 0
# while flag == 0:
while step * batch_size < training_iters:
batch_y, batch_x = data.train.next_batch(batch_size)
# pdb.set_trace()
# batch_x = batch_x.reshape((batch_size, n_input))
batch_x = batch_x.reshape((batch_size, n_input, n_input))
batch_y = batch_y.reshape((batch_size, n_input, n_input))
batch_y = convert_to_2_channel(batch_y, batch_size)
# batch_y = batch_y.reshape((batch_size, n_output, n_classes))
batch_y = batch_y.reshape((batch_size, 200, 200, n_classes))
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
keep_prob: dropout})
if step % display_step == 0:
flag = 1
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y,
keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc)
step += 1
print "Optimization Finished!"
save_path = "model.ckpt"
saver.save(sess, save_path)
im = Image.open('/home/kendall/Desktop/HA900_frames/frame0635.tif')
batch_x = np.array(im)
pdb.set_trace()
batch_x = batch_x.reshape((1, n_input, n_input))
batch_x = batch_x.astype(float)
# pdb.set_trace()
prediction = sess.run(pred, feed_dict={x: batch_x, keep_prob: 1.})
print prediction
arr1 = np.empty((n_input,n_input))
arr2 = np.empty((n_input,n_input))
for i in xrange(n_input):
for j in xrange(n_input):
for k in xrange(2):
if k == 0:
arr1[i][j] = prediction[0][i][j][k]
else:
arr2[i][j] = prediction[0][i][j][k]
# prediction = np.asarray(prediction)
# prediction = np.reshape(prediction, (200,200))
# np.savetxt("prediction.csv", prediction, delimiter=",")
np.savetxt("prediction1.csv", arr1, delimiter=",")
np.savetxt("prediction2.csv", arr2, delimiter=",")
# Calculate accuracy for 256 mnist test images
print "Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: data.test.images[:256],
y: data.test.labels[:256],
keep_prob: 1.})
Errors in the code
There are multiple errors in your code:
you shouldn't call tf.nn.sigmoid_cross_entropy_with_logits with the output of a softmax layer, but with the unscaled logits:
WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. Do not call this op with the output of softmax, as it will produce incorrect results.
in fact since you have 2 classes, you should use a loss with softmax, using tf.nn.softmax_cross_entropy_with_logits
When using tf.argmax(pred, 1), you only apply argmax over axis 1, which is the height of the output image. You should use tf.argmax(pred, 3) on the last axis (of size 2).
This might explain why you get 0.99 accuracy
On the output image, it will take the argmax over the height of the image, which is by default 0 (as all values are equal for each channel)
Wrong model
The biggest drawback is that your model in general will be very hard to optimize.
You have a softmax over 40,000 classes, which is huge.
You do not take advantage at all of the fact that you want to output an image (the prediction foreground / background).
for instance prediction 2,345 is highly correlated with prediction 2,346 and prediction 2,545 but you don't take that into account
I recommend reading a bit about semantic segmentation first:
this paper: Fully Convolutional Networks for Semantic Segmentation
these slides from CS231n (Stanford): especially the part about upsampling and deconvolution
Recommendations
If you want to work with TensorFlow, you will need to start small. First try a very simple network with maybe 1 hidden layer.
You need to plot all the shapes of your tensors to make sure they correspond to what you thought. For instance, if you had plotted tf.argmax(y, 1), you would have realized the shape is [batch_size, 200, 2] instead of the expected [batch_size, 200, 200].
TensorBoard is your friend, you should try to plot the input image here, as well as your predictions to see what they look like.
Try small, with a very small dataset of 10 images and see if you can overfit it and predict almost the exact response.
To conclude, I am not sure of all my suggestions but they are worth trying, and I hope this will help you on the path to success !

How to mix variable sequence length and variable batch size in RNN

I'm trying to train a RNN-Classifier with a training set that has variable length, I've read around and the suggested solution is to do bucketing on the training data and then feed batches out of each bucket and exit the RNN on certain sequence_length, however the results from this process vs just trimming the input sequences to length 10 is extremely worse and my loss function and accuracy move up and down erratically
Some more information:
I have variable batch size because some times there's not enough samples of a certain sequence length in a bucket
A sequence is a series of words converted into word embeddings
If I have a bucket of sequences of length 4, the batch would be of size max_seq_length padding everything on the right with vectors full of 0's
batches have the following shape (batch_size,number_steps,embedding_dimensions)
batch(128,48,100)
sequence_length = 4
classes(128,188)
batch(128,48,100)
sequence_length = 8
classes(128,188)
...
batch(30,48,100)
sequence_length = 40
classes(30,188)
I feed the sequence_length as the early_stop variable to my RNN
What is the best way to mix variable sequence length with variable bath size?
My Graph:
# Network Parameters
n_input = embeddings_dim # word embeddings dimensions : 100
n_steps = max_seq_len # timesteps = maximum sequence length in my training = 47
n_classes = total_labels # total classes = 188
graph = tf.Graph()
with graph.as_default():
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
# Tensorflow LSTM cell requires 2x n_hidden length (state & cell)
istate = tf.placeholder("float", [None, 2*n_hidden])
y = tf.placeholder("float", [None, n_classes])
#at what step we should read out the value of the RNN
early_stop = tf.placeholder(tf.int32)
tf.scalar_summary('early_stop', early_stop)
# Define weights
weights = {
'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'hidden': tf.Variable(tf.random_normal([n_hidden])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(_X, _istate, _weights, _biases):
# input shape: (batch_size, n_steps, n_input)
_X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size
# Reshape to prepare input to hidden activation
_X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
# Linear activation
_X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.LSTMCell(n_hidden, forget_bias=0.25)
# Split data because rnn cell needs a list of inputs for the RNN inner loop
_X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, _X,
initial_state=_istate,
sequence_length=early_stop)
# Linear activation
# Get inner loop last output
return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
pred = RNN(x, istate, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
global_step = tf.Variable(0, name='global_step', trainable=False)
learning_rate_2 = tf.train.exponential_decay(learning_rate, global_step, 300, 0.96, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_2).minimize(cost) # Adam Optimizer
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
My Training loop:
with tf.Session(graph=graph) as sess:
init = tf.initialize_all_variables()
sess.run(init)
step = 1
while step * batch_size < training_iters:
#batch_xs = batch_xs.reshape((batch_size, n_steps, n_input))
batch_xs, batch_ys, batch_sl = train_batches.next()
ins_batch_size = len(batch_xs)
# Fit training using batch data
summ ,acc = sess.run([merged,optimizer], feed_dict={x: batch_xs, y: batch_ys,
istate: np.zeros((ins_batch_size, 2*n_hidden)),
early_stop:batch_sl})
train_writer.add_summary(summ, step)
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys,
istate: np.zeros((ins_batch_size, 2*n_hidden)),
early_stop:batch_sl})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys,
istate: np.zeros((ins_batch_size, 2*n_hidden)),
early_stop:batch_sl})
print( "Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + \
", Training Accuracy= " + "{:.5f}".format(acc))
step += 1
print( "Optimization Finished!")
test_data,test_label, test_sl = test_batches.next()
test_len = len(test_data)
print( "Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_label,
istate: np.zeros((test_len, 2*n_hidden)),
early_stop:test_sl}))

Categories