I recently tried on building a deep NN for identifying image between 1 and 0 from scratch. My codes seems to be having a very low accuracy and I am wondering why. Since I am still a beginner, I do not know how to check and also to evaluate my model. Is there a way to improve while at the same time check each dataset to print and see whether the model predicted correctly?
My dataset is from here: https://www.kaggle.com/kanncaa1/deep-learning-tutorial-for-beginners
My codes:
from zipfile import ZipFile
# from builtins import FileExistsError
from subprocess import check_output
from sklearn.model_selection import train_test_split
import os
import numpy as np
#import matplotlib
#matplotlib.use("Agg")
#import matplotlib.pyplot as plt
import tensorflow as tf
dirName = "input_file"
try:
# Create a directory
os.mkdir(dirName)
print("directory " + dirName + " created")
except:
# If directory exists
print("directory " + dirName + " exists \n")
# Extract the zipfile to the directory
with ZipFile("sign-language-digits-dataset.zip", "r") as zip_data:
zip_data.extractall("input_file")
# Print the files
print("List of files:")
print(check_output(["ls","input_file"]))
# Load data
x_data = np.load("input_file/X.npy")
y_data = np.load("input_file/Y.npy")
X = np.concatenate((x_data[204:409], x_data[822:1027] ), axis=0)
# Is there a way to improve this part? from here
first_zeros = np.zeros(205)
first_ones = np.ones(205)
zeros = list(zip(first_zeros, first_ones))
second_zeros = np.zeros(205)
second_ones = np.ones(205)
ones = list(zip(second_ones, second_zeros))
ones = np.asarray(ones)
zeros = np.asarray(zeros)
# to here???
Y = np.concatenate((zeros, ones), axis=0).reshape(X.shape[0],2)
# Create 25% for test data and random state is the seed with randomly picked
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25,
random_state=42)
try:
if len(X_train) == len(Y_train):
print("Both have {} lengths".format(len(X_train)))
except:
print("Different length")
X_train_flatten = X_train.reshape(X_train.shape[0],
X_train.shape[1]*X_train.shape[2])
X_test_flatten = X_test.reshape(X_test.shape[0],
X_test.shape[1]*X_test.shape[2])
# Network parameters
h_layer = 255
h_layer2 = 255
num_input = X_train_flatten.shape[1] # .shape is used for getting the shape
num_classes = Y_train.shape[1] # .shape is used for getting the shape
# parameters
learning_rate = 0.1
num_steps = 500
batch_size = 128
display_step = 100
# tf graph input
x = tf.placeholder("float", [None, num_input]) #4096 because these are the
#image size which required to be trained
y = tf.placeholder("float", [None, num_classes]) #1 because these are the
# output values that are either 0 or 1 in 2D but only 1 column
# Weight and bias
weights = {
"h1":tf.Variable(tf.random_normal([num_input, h_layer])),
"h2":tf.Variable(tf.random_normal([h_layer, h_layer2])),
"output":tf.Variable(tf.random_normal([h_layer2, num_classes]))
}
biases = {
"h1":tf.Variable(tf.random_normal([h_layer])),
"h2":tf.Variable(tf.random_normal([h_layer2])),
"output":tf.Variable(tf.random_normal([num_classes]))
}
# Create model
def neural_network(x):
layer_1 = tf.add(tf.matmul(x, weights["h1"]), biases["h1"])
layer_2 = tf.add(tf.matmul(layer_1, weights["h2"]), biases["h2"])
output = tf.add(tf.matmul(layer_2, weights["output"]), biases["output"])
return output
# Construct model
logits = neural_network(x)
# Define loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for step in range(1, num_steps+1):
if step % display_step == 0 or step == 1:
# feed_dict takes values from the placeholder which is the tf
# graph input in the form of 2D - (410, 4096) shape
loss_val, accuracy_val = sess.run([loss, accuracy],
feed_dict={x:X_train_flatten,
y:Y_train})
print("Step " + str(step) + ", Minibatch Loss= " + \
"{:.4f}".format(loss_val) + ", Training Accuracy= " + \
"{:.3f}".format(accuracy_val))
print("Optimization Finished!")
# Calculate accuracy for MNIST test images
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: X_test_flatten,
y: Y_test}))
My side question:
1) My class should be [zero, one] so that means I have 2 classes. The output should be identifying the image input which is in pixels 4096 and output either 0 and 1. For example, if "0" then it will output [1,0] and for "1" it will output [0,1].
2) Is my code even considered as DNN or just a simple NN? If it is not DNN then how should I change it so that it could become DNN?
3) From my understanding, DNN do not require manual feature extraction because it learns from the training data and then perform classification. Unlike machine learning. Am I understanding it correctly?
Related
I'm working on this classification program where i'm training my model to predict whether the object is a nut or a screw. I created my own dataset since i did not get any. I trained my model but i'm not getting correct predictions. Probability of values go beyond 1, basically i get garbage values.
I get this predicted value: [[9.990779e-01 9.220659e-04]]
#Training code
import dataset
import tensorflow as tf
import time
from datetime import timedelta
import math
import random
import numpy as np
import os
# Adding Seed so that random initialization is consistent
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
batch_size = 20
# Prepare input data
classes = os.listdir('training_set')
num_classes = len(classes)
# 20% of the data will automatically be used for validation
validation_size = 0.2
img_size = 128
num_channels = 3
train_path = 'training_set'
# We shall load all the training and validation images and labels into
memory using openCV and use that during training
data = dataset.read_train_sets(train_path, img_size, classes,
validation_size=validation_size)
print("Complete reading input data. Will Now print a snippet of it")
print("Number of files in Training-
set:\t\t{}".format(len(data.train.labels)))
print("Number of files in Validation-
set:\t{}".format(len(data.valid.labels)))
session = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size,
num_channels], name='x')
## labels
y_true = tf.placeholder(tf.float32, shape=[None, num_classes],
name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)
##Network graph params
filter_size_conv1 = 3
num_filters_conv1 = 32
filter_size_conv2 = 3
num_filters_conv2 = 32
filter_size_conv3 = 3
num_filters_conv3 = 32
fc_layer_size = 128
def create_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def create_biases(size):
return tf.Variable(tf.constant(0.05, shape=[size]))
def create_convolutional_layer(input,
num_input_channels,
conv_filter_size,
num_filters):
## We shall define the weights that will be trained using create_weights function.
weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
## We create biases using the create_biases function. These are also trained.
biases = create_biases(num_filters)
## Creating the convolutional layer
layer = tf.nn.conv2d(input=input,
filter=weights,
strides=[1, 1, 1, 1],
padding='SAME')
layer += biases
## We shall be using max-pooling.
layer = tf.nn.max_pool(value=layer,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
## Output of pooling is fed to Relu which is the activation function for us.
layer = tf.nn.relu(layer)
return layer
def create_flatten_layer(layer):
# We know that the shape of the layer will be [batch_size img_size img_size num_channels]
# But let's get it from the previous layer.
layer_shape = layer.get_shape()
## Number of features will be img_height * img_width* num_channels. But we shall calculate it in place of hard-coding it.
num_features = layer_shape[1:4].num_elements()
## Now, we Flatten the layer so we shall have to reshape to num_features
layer = tf.reshape(layer, [-1, num_features])
return layer
def create_fc_layer(input,
num_inputs,
num_outputs,
use_relu=True):
# Let's define trainable weights and biases.
weights = create_weights(shape=[num_inputs, num_outputs])
biases = create_biases(num_outputs)
# Fully connected layer takes input x and produces wx+b.Since, these are matrices, we use matmul function in Tensorflow
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return layer
layer_conv1 = create_convolutional_layer(input=x,
num_input_channels=num_channels,
conv_filter_size=filter_size_conv1,
num_filters=num_filters_conv1)
layer_conv2 = create_convolutional_layer(input=layer_conv1,
num_input_channels=num_filters_conv1,
conv_filter_size=filter_size_conv2,
num_filters=num_filters_conv2)
layer_conv3 = create_convolutional_layer(input=layer_conv2,
num_input_channels=num_filters_conv2,
conv_filter_size=filter_size_conv3,
num_filters=num_filters_conv3)
layer_flat = create_flatten_layer(layer_conv3)
layer_fc1 = create_fc_layer(input=layer_flat,
num_inputs=layer_flat.get_shape()
[1:4].num_elements(),
num_outputs=fc_layer_size,
use_relu=True)
layer_fc2 = create_fc_layer(input=layer_fc1,
num_inputs=fc_layer_size,
num_outputs=num_classes,
use_relu=False)
y_pred = tf.nn.softmax(layer_fc2, name='y_pred')
y_pred_cls = tf.argmax(y_pred, dimension=1)
session.run(tf.global_variables_initializer())
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,
labels=y_true)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
session.run(tf.global_variables_initializer())
def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
acc = session.run(accuracy, feed_dict=feed_dict_train)
val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation
Accuracy: {2:>6.1%}, Validation Loss: {3:.3f}"
print(msg.format(epoch + 1, acc, val_acc, val_loss))
total_iterations = 0
saver = tf.train.Saver()
def train(num_iteration):
global total_iterations
for i in range(total_iterations,
total_iterations + num_iteration):
x_batch, y_true_batch, _, cls_batch =
data.train.next_batch(batch_size)
x_valid_batch, y_valid_batch, _, valid_cls_batch =
data.valid.next_batch(batch_size)
feed_dict_tr = {x: x_batch,
y_true: y_true_batch}
feed_dict_val = {x: x_valid_batch,
y_true: y_valid_batch}
session.run(optimizer, feed_dict=feed_dict_tr)
if i % int(data.train.num_examples / batch_size) == 0:
val_loss = session.run(cost, feed_dict=feed_dict_val)
epoch = int(i / int(data.train.num_examples / batch_size))
show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss)
saver.save(session, 'C:\\Nutsbolts\\nuts-screws-model')
total_iterations += num_iteration
train(num_iteration=3000)
#Prediction code
import tensorflow as tf
import numpy as np
import os,glob,cv2
import sys,argparse
# First, pass the path of the image
dir_path = 'C:\\nutsbolts\\testing_set\\nuts'
image_path= 'nuts11.jpg'
filename = dir_path +'/' +image_path
image_size=128
num_channels=3
images = []
# Reading the image using OpenCV
image = cv2.imread(filename)
# Resizing the image to our desired size and preprocessing will be done
exactly as done during training
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
images.append(image)
images = np.array(images, dtype=np.uint8)
images = images.astype('float32')
images = np.multiply(images, 1.0/255.0)
#The input to the network is of shape [None image_size image_size
num_channels]. Hence we reshape.
x_batch = images.reshape(1, image_size,image_size,num_channels)
## Let us restore the saved model
sess = tf.Session()
# Step-1: Recreate the network graph. At this step only graph is created.
saver = tf.train.import_meta_graph('nuts-screws-model.meta')
# Step-2: Now let's load the weights saved using the restore method.
saver.restore(sess, tf.train.latest_checkpoint('./'))
# Accessing the default graph which we have restored
graph = tf.get_default_graph()
# Now, let's get hold of the op that we can be processed to get the output.
# In the original network y_pred is the tensor that is the prediction of the
network
y_pred = graph.get_tensor_by_name("y_pred:0")
## Let's feed the images to the input placeholders
x= graph.get_tensor_by_name("x:0")
y_true = graph.get_tensor_by_name("y_true:0")
y_test_images = np.zeros((1, len(os.listdir('testing_set'))))
### Creating the feed_dict that is required to be fed to calculate y_pred
feed_dict_testing = {x: x_batch, y_true: y_test_images}
result=sess.run(y_pred, feed_dict=feed_dict_testing)
# result is of this format [probabiliy_of_nuts probability_of_screws]
print(result)
9.990779e-01 actually is below 1. You could see it as: 9.990779 * (the exponential of -01).
I am trying to recreate/rerun a previous neural network. The idea is that I save the weights and biases with which the network is initialised in run (1), and reuse those exact weights and biases for initialisation in run (2), so that the outcome of run (2) will be exactly that of run (1). The relevant code is below.
However, I can't manage to do this. If I rerun the network, setting use_stored_weights to True (which prompts neural_network_model to restore the previously saved weights and biases, and, I hope, initialise the network with them), I do indeed always get the same results - but these are not the results of the run I am trying to emulate (in fact, they are much worse). It is also strange that I ALWAYS get the same results - they thus seem to be independent of the stored weights and biases.
The first thing I checked is whether I am correctly restoring my weights and biases (in neural_network_model), and this is the case.
I'm not really sure what is going on here. I have two suspicions, both of which I don't know how to check:
Although the correct weights and biases are restored in neural_network_model, the network is in fact not initialised with them. Is it the command sess.run(init) in train_neural_network that somehow does this?
I am reinitialising the network with the stored weights and biases not once, but every time - this would explain the poor results.
... but it might as well be something completely different. Any help much appreciated!
UPDATE: I figured out what is happening: when use_stored_weights is set to True, the weights are always initialised as all zeros. As far as I can see, this is due to the sess.run(init) in train_neural_network. However, if I leave out that line, I get an error:
FailedPreconditionError: Attempting to use uninitialized value b2
So I guess my question is now: how can I make the restored weights and biases accessible in train_neural_network?
Imports:
import tensorflow as tf
import numpy as np
from numpy import genfromtxt
This function builds the neural network:
def neural_network_model(data, layer_sizes, use_stored_weights):
num_layers = len(layer_sizes) - 1 # hidden and output layers
weights = {}
biases = {}
# initialise the weights
# (a) create new weights and biases
if not use_stored_weights:
for i in range(num_layers):
w_name = 'W' + str(i+1)
b_name = 'b' + str(i+1)
weights[w_name] = tf.get_variable(w_name, [layer_sizes[i], layer_sizes[i+1]],
initializer = tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
biases[b_name] = tf.get_variable(b_name, [layer_sizes[i+1]],
initializer = tf.zeros_initializer(), dtype=tf.float32)
# save weights and biases
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
save_path = saver.save(sess, fold_path + 'weights/' + 'weights.ckpt')
# (b) restore weights and biases
else:
for i in range(num_layers):
# prepare variable
w_name = 'W' + str(i+1)
b_name = 'b' + str(i+1)
weights[w_name] = tf.get_variable(w_name, [layer_sizes[i], layer_sizes[i+1]],
initializer = tf.zeros_initializer(), dtype=tf.float32)
biases[b_name] = tf.get_variable(b_name, [layer_sizes[i+1]],
initializer = tf.zeros_initializer(), dtype=tf.float32)
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, fold_path + 'weights/' + 'weights.ckpt')
# calculate linear and relu outputs for hidden layers
a_prev = data
for i in range(len(weights)-1):
z = tf.add(tf.matmul(a_prev, weights['W' + str(i+1)]), biases['b' + str(i+1)])
a = tf.nn.relu(z)
a_r = tf.nn.dropout(a, keep_prob)
a_prev = a_r
# calculate linear output for output layer
z_o = tf.add(tf.matmul(a_prev, weights['W' + str(len(weights))]), biases['b' + str(len(weights))])
return z_o
This function trains and evaluates the network:
def train_neural_network(x, layer_sizes, use_stored_weights):
prediction = neural_network_model(x, layer_sizes, use_stored_weights)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=lrn_rate).minimize(cost)
softm = tf.nn.softmax(prediction)
pred_class = tf.argmax(softm)
costs = []
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(epochs):
epoch_loss = 0
for _ in range(int(len(x_train)/batch_size)):
epoch_x, epoch_y = x_train, y_train
_, c = sess.run([optimizer, cost], feed_dict = {x: epoch_x, y: epoch_y, keep_prob: kp})
epoch_loss += c
softmaxes_tst = sess.run([softm, pred_class], feed_dict={x: x_test, keep_prob: 1.0})[0]
incorr = 0
for i in range(len(softmaxes_tst)):
curr_sm = softmaxes_tst[i]
curr_lbl = y_test[i]
if np.argmax(curr_sm) != np.argmax(curr_lbl):
incorr += 1
print('incorr: ', incorr)
num_ex = len(x_test)
print('acc = ' + str(num_ex-incorr) + '/' + str(num_ex) + ' = ' + str((num_ex-incorr)/num_ex))
print('accuracy train:', accuracy.eval({x:x_train, y:y_train, keep_prob: kp}))
print('accuracy test :', accuracy.eval({x:x_test, y:y_test, keep_prob: 1.0}))
And this, finally, is the code that calls the above functions:
lrn_rate = 0.01
kp = 0.85
epochs = 400
path = '/my/path/to/data/'
fold_path = ''
num_folds = 19
for i in range(num_folds):
tf.reset_default_graph()
fold_path = path + 'fold_' + str(i+1) + '/'
x_train = genfromtxt(fold_path + 'fv_train.csv', delimiter=',')
y_train = genfromtxt(fold_path + 'lbl_train.csv', delimiter=',')
x_test = genfromtxt(fold_path + 'fv_test.csv', delimiter=',')
y_test = genfromtxt(fold_path + 'lbl_test.csv', delimiter=',')
num_classes = len(y_train[0])
num_features = len(x_train[0])
batch_size = len(x_train)
num_nodes_hl1 = num_features
num_nodes_hl2 = num_features
layer_sizes = [num_features, num_nodes_hl1, num_nodes_hl2, num_classes]
x = tf.placeholder('float', [None, num_features])
y = tf.placeholder('float')
keep_prob = tf.placeholder('float')
use_stored_weights = True
train_neural_network(x, layer_sizes, use_stored_weights)
This is my code below:
'''
Tensorflow LSTM classification of 16x30 images.
'''
from __future__ import print_function
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
from numpy import genfromtxt
from sklearn.cross_validation import train_test_split
import pandas as pd
'''
a Tensorflow LSTM that will sequentially input several lines from each single image
i.e. The Tensorflow graph will take a flat (1,480) features image as it was done in Multi-layer
perceptron MNIST Tensorflow tutorial, but then reshape it in a sequential manner with 16 features each and 30 time_steps.
'''
blaine = genfromtxt('./Desktop/Blaine_CSV_lstm.csv',delimiter=',') # CSV transform to array
target = [row[0] for row in blaine] # 1st column in CSV as the targets
data = blaine[:, 1:481] #flat feature vectors
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.05, random_state=42)
f=open('cs-training.csv','w') #1st split for training
for i,j in enumerate(X_train):
k=np.append(np.array(y_train[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
f=open('cs-testing.csv','w') #2nd split for test
for i,j in enumerate(X_test):
k=np.append(np.array(y_test[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
new_data = genfromtxt('cs-training.csv',delimiter=',') # Training data
new_test_data = genfromtxt('cs-testing.csv',delimiter=',') # Test data
x_train=np.array([ i[1::] for i in new_data])
ss = pd.Series(y_train) #indexing series needed for later Pandas Dummies one-hot vectors
y_train_onehot = pd.get_dummies(ss)
x_test=np.array([ i[1::] for i in new_test_data])
gg = pd.Series(y_test)
y_test_onehot = pd.get_dummies(gg)
# General Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 33
display_step = 10
# Tensorflow LSTM Network Parameters
n_input = 16 # MNIST data input (img shape: 28*28)
n_steps = 30 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 20 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_classes])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(x, weights, biases):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# Define a lstm cell with tensorflow
with tf.variable_scope('cell_def'):
lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
with tf.variable_scope('rnn_def'):
outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
pred = RNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x = np.split(x_train, 15)
batch_y = np.split(y_train_onehot, 15)
for index in range(len(batch_x)):
ouh1 = batch_x[index]
ouh2 = batch_y[index]
# Reshape data to get 28 seq of 28 elements
ouh1 = np.reshape(ouh1,(batch_size, n_steps, n_input))
sess.run(optimizer, feed_dict={x: ouh1, y: ouh2}) # Run optimization op (backprop)
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: ouh1, y: ouh2})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: ouh1, y: ouh2})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
and I am getting the below error that it seems i am re-iterating over the same variable on lines 92 and 97, and i am concerned that it might be a case of incompatibility with Tensorflow 0.10.0 on the RNN def side:
ValueError: Variable RNN/BasicLSTMCell/Linear/Matrix already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/home/mohsen/lstm_mnist.py", line 92, in RNN
outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32)
File "/home/mohsen/lstm_mnist.py", line 97, in <module>
pred = RNN(x, weights, biases)
File "/home/mohsen/anaconda2/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 81, in execfile
builtins.execfile(filename, *where)
What could be the origin of this error and how i can resolve it?
EDIT: from the original repo where i build upon my code the same variable_scope problem persists https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py
You are not iterating over the same variable in line 92 and 97, since those will always be in the same namespace, at least in the current setting, since you are calling one namespace from within another (since one is embedded in the RNN function). So your effective variable scope will be something like 'backward/forward'.
Hence the problem, in my guess, is in lines 89 and 92, since both "live" in the same namespace (see above), and both may introduce a variable called RNN/BasicLSTMCell/Linear/Matrix. So you should change your code to the following:
# Define a lstm cell with tensorflow
with tf.variable_scope('cell_def'):
lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0)
# Get lstm cell output
with tf.variable_scope('rnn_def'):
outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32)
This makes the LSTMCell initialization live in one namespace - "cell_def/*", and the initialization of the complete RNN in another - "rnn_def/*".
I have written my self a tensorflow class as below, but I met some problem when I am trying to set some weight to zero after training manually in function refine_init_weight. In this function, I tried setting all numbers to zero once it is below some value and see how would the accuracy rate change. The problem is, when I reran self.sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}), it seems that its value does not changed accordingly. I am just wondering where should I change the symbolic variable (the accuracy depends on the weights I changed) at this case?
import tensorflow as tf
from nncomponents import *
from helpers import *
from sda import StackedDenoisingAutoencoder
class DeepFeatureSelection:
def __init__(self, X_train, X_test, y_train, y_test, weight_init='sda', hidden_dims=[100, 100, 100], epochs=1000,
lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, learning_rate=0.1, optimizer='FTRL'):
# Initiate the input layer
# Get the dimension of the input X
n_sample, n_feat = X_train.shape
n_classes = len(np.unique(y_train))
self.epochs = epochs
# Store up original value
self.X_train = X_train
self.y_train = one_hot(y_train)
self.X_test = X_test
self.y_test = one_hot(y_test)
# Two variables with undetermined length is created
self.var_X = tf.placeholder(dtype=tf.float32, shape=[None, n_feat], name='x')
self.var_Y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes], name='y')
self.input_layer = One2OneInputLayer(self.var_X)
self.hidden_layers = []
layer_input = self.input_layer.output
# Initialize the network weights
weights, biases = init_layer_weight(hidden_dims, X_train, weight_init)
print(type(weights[0]))
# Create hidden layers
for init_w,init_b in zip(weights, biases):
self.hidden_layers.append(DenseLayer(layer_input, init_w, init_b))
layer_input = self.hidden_layers[-1].output
# Final classification layer, variable Y is passed
self.softmax_layer = SoftmaxLayer(self.hidden_layers[-1].output, n_classes, self.var_Y)
n_hidden = len(hidden_dims)
# regularization terms on coefficients of input layer
self.L1_input = tf.reduce_sum(tf.abs(self.input_layer.w))
self.L2_input = tf.nn.l2_loss(self.input_layer.w)
# regularization terms on weights of hidden layers
L1s = []
L2_sqrs = []
for i in xrange(n_hidden):
L1s.append(tf.reduce_sum(tf.abs(self.hidden_layers[i].w)))
L2_sqrs.append(tf.nn.l2_loss(self.hidden_layers[i].w))
L1s.append(tf.reduce_sum(tf.abs(self.softmax_layer.w)))
L2_sqrs.append(tf.nn.l2_loss(self.softmax_layer.w))
self.L1 = tf.add_n(L1s)
self.L2_sqr = tf.add_n(L2_sqrs)
# Cost with two regularization terms
self.cost = self.softmax_layer.cost \
+ lambda1*(1.0-lambda2)*0.5*self.L2_input + lambda1*lambda2*self.L1_input \
+ alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2*self.L1
# FTRL optimizer is used to produce more zeros
# self.optimizer = tf.train.FtrlOptimizer(learning_rate=learning_rate).minimize(self.cost)
self.optimizer = optimize(self.cost, learning_rate, optimizer)
self.accuracy = self.softmax_layer.accuracy
self.y = self.softmax_layer.y
def train(self, batch_size=100):
sess = tf.Session()
self.sess = sess
sess.run(tf.initialize_all_variables())
for i in xrange(self.epochs):
x_batch, y_batch = get_batch(self.X_train, self.y_train, batch_size)
sess.run(self.optimizer, feed_dict={self.var_X: x_batch, self.var_Y: y_batch})
if i % 2 == 0:
l = sess.run(self.cost, feed_dict={self.var_X: x_batch, self.var_Y: y_batch})
print('epoch {0}: global loss = {1}'.format(i, l))
self.selected_w = sess.run(self.input_layer.w)
print("Train accuracy:",sess.run(self.accuracy, feed_dict={self.var_X: self.X_train, self.var_Y: self.y_train}))
print("Test accuracy:",sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
print(self.selected_w)
print(len(self.selected_w[self.selected_w==0]))
print("Final test accuracy:",sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
def refine_init_weight(self, threshold=0.001):
refined_w = np.copy(self.selected_w)
refined_w[refined_w < threshold] = 0
self.input_layer.w.assign(refined_w)
print("Test accuracy refined:",self.sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
(I'll just repost my comment as an answer)
You need to run the assign operation you created, otherwise it is just added to the graph and never executed.
assign_op = self.input_layer.w.assign(refined_w)
self.sess.run(assign_op)
If you want to do this in Tensorflow you could create a boolean mask of the weight variable with tf.greater and tf.less, convert this mask to tf.float32 and multiply it with the weight array.
I am trying to train a sparse data with an MLP to predict a forecast. However, the forecast on the test data is giving the same value for all observations. Once I omit the activation function from each layer, the outcome starts being different.
my code is below:
# imports
import numpy as np
import tensorflow as tf
import random
import json
from scipy.sparse import rand
# Parameters
learning_rate= 0.1
training_epochs = 50
batch_size = 100
# Network Parameters
m= 1000 #number of features
n= 5000 # number of observations
hidden_layers = [5,2,4,1,6]
n_layers = len(hidden_layers)
n_input = m
n_classes = 1 # it's a regression problem
X_train = rand(n, m, density=0.2,format = 'csr').todense().astype(np.float32)
Y_train = np.random.randint(4, size=n)
X_test = rand(200, m, density=0.2,format = 'csr').todense().astype(np.float32)
Y_test = np.random.randint(4, size=200)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None])
# Store layers weight & bias
weights = {}
biases = {}
weights['h1']=tf.Variable(tf.random_normal([n_input, hidden_layers[0]])) #first matrice
biases['b1'] = tf.Variable(tf.random_normal([hidden_layers[0]]))
for i in xrange(2,n_layers+1):
weights['h'+str(i)]= tf.Variable(tf.random_normal([hidden_layers[i-2], hidden_layers[i-1]]))
biases['b'+str(i)] = tf.Variable(tf.random_normal([hidden_layers[i-1]]))
weights['out']=tf.Variable(tf.random_normal([hidden_layers[-1], 1])) #matrice between last layer and output
biases['out']= tf.Variable(tf.random_normal([1]))
# Create model
def multilayer_perceptron(_X, _weights, _biases):
layer_begin = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1'],a_is_sparse=True), _biases['b1']))
for layer in xrange(2,n_layers+1):
layer_begin = tf.nn.relu(tf.add(tf.matmul(layer_begin, _weights['h'+str(layer)]), _biases['b'+str(layer)]))
#layer_end = tf.nn.dropout(layer_begin, 0.3)
return tf.matmul(layer_begin, _weights['out'])+ _biases['out']
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
rmse = tf.reduce_sum(tf.abs(y-pred))/tf.reduce_sum(tf.abs(y)) # rmse loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(rmse) # Adam Optimizer
# Initializing the variables
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
#training
for step in xrange(training_epochs):
# Generate a minibatch.
start = random.randrange(1, n - batch_size)
#print start
batch_xs=X_train[start:start+batch_size,:]
batch_ys =Y_train[start:start+batch_size]
#printing
_,rmseRes = sess.run([optimizer, rmse] , feed_dict={x: batch_xs, y: batch_ys} )
if step % 20 == 0:
print "rmse [%s] = %s" % (step, rmseRes)
#testing
pred_test = multilayer_perceptron(X_test, weights, biases)
print "prediction", pred_test.eval()[:20]
print "actual = ", Y_test[:20]
PS: I am generating randomly my data just to reproduce the error. My data is sparse in fact, pretty similar to the one generated randomly. The problem I want to solve is: MLP is giving the same prediction for all observations in the test data.
That's a sign that your training failed. With GoogeLeNet Imagenet training I've seen it label everything as "nematode" when started with a bad choice of hyper-parameters. Things to check -- does your training loss decrease? If it doesn't decrease, try different learning rates/architectures. If it decreases to zero maybe your loss is wrong like was case here