Tensorflow - how to import MNIST database - python

I want to train a model using MNIST database. I'm working on Tensorflow tutorial Tensorflow tutorial. A suggested way to import database is to use mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) but I need to use something like:
with open('my/directory/train-images-idx3-ubyte.gz', 'rb') as f:
train_images = extract_images(f)
with open('my/directory/train-labels-idx1-ubyte.gz', 'rb') as f:
train_labels = extract_images(f)
...
Thats brings question how to adjust code to work with my train_images, train_lables, test_images, test_lables:
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
# Build the graph for the deep net
y_conv, keep_prob = deepnn(x)
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_,
logits=y_conv)
cross_entropy = tf.reduce_mean(cross_entropy)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
graph_location = tempfile.mkdtemp()
print('Saving graph to: %s' % graph_location)
train_writer = tf.summary.FileWriter(graph_location)
train_writer.add_graph(tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20000):
batch = mnist.train.next_batch(50)
if i % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %g' % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

A recommended way to import the mnist dataset with TF2 is the following:
from tensorflow.keras.datasets import mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

Related

terminal error : setting an array element with a sequence

this code for classification
# Load data
adj, features, y_train, y_val, labels, y_test, train_mask, val_mask, test_mask =
load_data(FLAGS.dataset)
#labels=load_data(FLAGS.dataset)
# Some preprocessing
XX=features
yy=labels
selectedfeatures= SelectKBest(chi2, k=200).fit_transform(XX,yy)
selectedfeatures = preprocess_features(selectedfeatures)
Define placeholders
placeholders = {
'support': [tf.sparse_placeholder(tf.float32) for _ in range(num_supports)],
'selectedfeatures': tf.sparse_placeholder(tf.float32, shape=tf.constant(selectedfeatures[2], dtype=tf.int64)),
'labels': tf.placeholder(tf.float32, shape=(None, y_train.shape[1])),
'labels_mask': tf.placeholder(tf.int32),
'dropout': tf.placeholder_with_default(0., shape=()),
'num_features_nonzero': tf.placeholder(tf.int32) # helper variable for sparse dropout
}
# Create model
model = model_func(placeholders, input_dim=selectedfeatures[2][1], logging=True)
# Initialize session
sess = tf.Session()
# Define model evaluation function
def evaluate(selectedfeatures, support, labels, mask, placeholders):
t_test = time.time()
feed_dict_val = construct_feed_dict(selectedfeatures, support, labels, mask, placeholders)
outs_val = sess.run([model.loss, model.accuracy], feed_dict=feed_dict_val)
return outs_val[0], outs_val[1], (time.time() - t_test)
# Init variables
sess.run(tf.global_variables_initializer())
cost_val = []
# Train model
for epoch in range(FLAGS.epochs):
t = time.time()
# Construct feed dictionary
feed_dict = construct_feed_dict(features, support, y_train, train_mask, placeholders)
feed_dict.update({placeholders['dropout']: FLAGS.dropout})
# Training step
outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
# Validation
cost, acc, duration = evaluate(features, support, y_val, val_mask, placeholders)
cost_val.append(cost)
# Print results
print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(outs[1]),
"train_acc=", "{:.5f}".format(outs[2]), "val_loss=", "{:.5f}".format(cost),
"val_acc=", "{:.5f}".format(acc), "time=", "{:.5f}".format(time.time() - t))
print("Optimization Finished!")
# Testing
test_cost, test_acc, test_duration = evaluate(features, support, y_test, test_mask, placeholders)
print("Test set results:", "cost=", "{:.5f}".format(test_cost),
"accuracy=", "{:.5f}".format(test_acc), "time=", "{:.5f}".format(test_duration))
. when i tried to execute in terminal, i got the following error
File "train.py", line 91, in
outs = sess.run([model.opt_op, model.loss, model.accuracy], feed_dict=feed_dict)
ValueError: setting an array element with a sequence.

Why does Chainer Batchnormalization not work well with mnist_nn

Chainer batchnormalization does not work well with my code although batch normalization of tensorflow works. I use the dataset mnist the code below shows.
Using chainer(version=6.1.0), without batchnormalization, the validation accuracy is within 0.97 and 0.98 after 100 epochs whereas with batchnormalization, it is less than 0.80 after 100 epochs.
When I use the same way with tensorflow(version=1.14.0), the validation accuracy is around 0.98 in a both way, with batchnormalization or without batchnormalization.
This is the part of my code.
The number of epoch is 100, and its batchsize is 1000. I use the Adam as the optimizer with learning_rate 0.01.
dataset, train data, validation data
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images=train_images.reshape(60000, 28*28)
test_images = test_images.reshape(10000, 28*28)
x_train = train_images.astype('float32')/255
y_train = train_labels.astype('int32')
x_val = test_images.astype('float32')/255
y_val = test_labels.astype('int32')
model and condition (chainer)
# Define model
class MyModel(Chain):
def __init__(self,n_in=784,n_hidden=100,n_out=10):
initializer = chainer.initializers.HeNormal()
super().__init__()
with self.init_scope():
self.l1=L.Linear(n_in, n_hidden, initialW=initializer)
self.l2=L.Linear(n_hidden, n_hidden, initialW=initializer)
self.l3=L.Linear(n_hidden, n_out, initialW=initializer)
self.bn=L.BatchNormalization(n_hidden, decay=0.99, eps=0.001)
def forward(self,x):
h = F.relu(self.bn(self.l1(x)))
h = F.relu(self.bn(self.l2(h)))
return self.l3(h)
model = MyModel()
optimizer = optimizers.Adam()
optimizer.setup(model)
n_epoch = 100
n_batchsize = 1000
model and condition (tensorflow)
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10
batch_norm_momentum = 0.9
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')
with tf.name_scope("dnn"):
he_init = tf.variance_scaling_initializer()
my_batch_norm_layer = partial(tf.layers.batch_normalization,
training=training,
momentum=batch_norm_momentum)
my_dense_layer = partial(tf.layers.dense,
kernel_initializer=he_init)
hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
bn1 = tf.nn.relu(my_batch_norm_layer(hidden1))
hidden2 = my_dense_layer(bn1, n_hidden2, name="hidden2")
bn2 = tf.nn.relu(my_batch_norm_layer(hidden2))
logits_before_bn = my_dense_layer(bn2, n_outputs, name="outputs")
logits = my_batch_norm_layer(logits_before_bn)
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope("train"):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
All code I use with chainer
import numpy as np
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
from keras.datasets import mnist
import cupy as cp
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images=train_images.reshape(60000, 28*28)
test_images = test_images.reshape(10000, 28*28)
x_train = train_images.astype('float32')/255
y_train = train_labels.astype('int32')
x_val = test_images.astype('float32')/255
y_val = test_labels.astype('int32')
# Define model
class MyModel(Chain):
def __init__(self,n_in=784,n_hidden=100,n_out=10):
initializer = chainer.initializers.HeNormal()
super().__init__()
with self.init_scope():
self.l1=L.Linear(n_in, n_hidden, initialW=initializer)
self.l2=L.Linear(n_hidden, n_hidden, initialW=initializer)
self.l3=L.Linear(n_hidden, n_out, initialW=initializer)
self.bn=L.BatchNormalization(n_hidden, decay=0.9, eps=0.001)
def forward(self,x):
h = F.relu(self.bn(self.l1(x)))
h = F.relu(self.bn(self.l2(h)))
return self.l3(h)
# define optimizer
model = MyModel()
optimizer = optimizers.Adam(alpha=0.01)
optimizer.setup(model)
## learn network
n_epoch = 100
n_batchsize = 1000
iteration = 0
gpu_id = 0
cuda.get_device(gpu_id).use()
# send the network to gpu memory
model.to_gpu(gpu_id)
print("epoch train/loss val/loss train/acc val/acc")
for epoch in range(n_epoch):
# order dataset randomly
order = np.random.permutation(range(len(x_train)))
loss_list = []
accuracy_list = []
for i in range(0, len(order), n_batchsize):
index = order[i:i+n_batchsize]
x_train_batch = x_train[index,:]
y_train_batch = y_train[index]
x_train_batch = cp.asarray(x_train_batch)
y_train_batch = cp.asarray(y_train_batch)
output_train = model(x_train_batch)
loss_train_batch = F.softmax_cross_entropy(output_train, y_train_batch)
accuracy_train_batch = F.accuracy(output_train, y_train_batch)
loss_list.append(cuda.to_cpu(loss_train_batch.array))
accuracy_list.append(cuda.to_cpu(accuracy_train_batch.array))
model.cleargrads()
loss_train_batch.backward()
optimizer.update()
iteration += 1
loss_train = np.mean(loss_list)
accuracy_train = np.mean(accuracy_list)
# after one epoch, evaluate with validation data
x_val = cp.asarray(x_val)
y_val = cp.asarray(y_val)
with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
output_val = model(x_val)
loss_val = F.softmax_cross_entropy(output_val, y_val)
loss_val = cuda.to_cpu(loss_val.array)
accuracy_val = F.accuracy(output_val, y_val)
accuracy_val = cuda.to_cpu(accuracy_val.array)
print('{0:>4d} {1:>10.4f} {2:>10.4f} {3:>10.4f} {4:>10.4f}'.format(epoch,loss_train,loss_val,accuracy_train,accuracy_val))
All code I use with tensorflow
python
import tensorflow as tf
from keras.datasets import mnist
from functools import partial
import numpy as np
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images=train_images.reshape(60000, 28*28)
test_images = test_images.reshape(10000, 28*28)
X_train = train_images.astype('float32')/255
y_train = train_labels.astype('int32')
X_valid = test_images.astype('float32')/255
y_valid = test_labels.astype('int32')
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10
batch_norm_momentum = 0.9
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')
with tf.name_scope("dnn"):
he_init = tf.variance_scaling_initializer()
my_batch_norm_layer = partial(tf.layers.batch_normalization,
training=training,
momentum=batch_norm_momentum)
my_dense_layer = partial(tf.layers.dense)
hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
bn1 = tf.nn.relu(my_batch_norm_layer(hidden1))
hidden2 = my_dense_layer(bn1, n_hidden2, name="hidden2")
bn2 = tf.nn.relu(my_batch_norm_layer(hidden2))
logits_before_bn = my_dense_layer(bn2, n_outputs, name="outputs")
logits = my_batch_norm_layer(logits_before_bn)
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
learning_rate = 0.01
with tf.name_scope("train"):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 100
batch_size = 1000
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
print("epoch train/loss val/loss train/acc val/acc")
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
loss_list = []
accuracy_list = []
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run([training_op, extra_update_ops],
feed_dict={training: True, X: X_batch, y: y_batch})
loss_batch = loss.eval(feed_dict={X: X_batch, y: y_batch})
accuracy_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
loss_list.append(loss_batch)
accuracy_list.append(accuracy_batch)
loss_val = loss.eval(feed_dict={X: X_valid, y: y_valid})
accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
print('{0:>4d} {1:>10.4f} {2:>10.4f} {3:>10.4f} {4:>10.4f}'
.format(epoch,np.mean(loss_list),loss_val,np.mean(accuracy_list),accuracy_val))
I expect batch normalization with chainer would be around 98% but it got less than 80%.
Am I using the batchnormalization with chainer in a wrong way, or the structure of batchnormalization differs a lot between chainer and tensorflow??
In order to use different batch statistics between the layers, the model definition has to be like the following code, which achieves 98% validation accuracy after 100 epochs in my environment.
class MyModel(Chain):
def __init__(self,n_in=784,n_hidden=100,n_out=10):
initializer = chainer.initializers.HeNormal()
super().__init__()
with self.init_scope():
self.l1=L.Linear(n_in, n_hidden, initialW=initializer)
self.l2=L.Linear(n_hidden, n_hidden, initialW=initializer)
self.l3=L.Linear(n_hidden, n_out, initialW=initializer)
self.bn1=L.BatchNormalization(n_hidden, decay=0.9, eps=0.001)
self.bn2=L.BatchNormalization(n_hidden, decay=0.9, eps=0.001)
def forward(self,x):
h = F.relu(self.bn1(self.l1(x)))
h = F.relu(self.bn2(self.l2(h)))
return self.l3(h)

ValueError: Cannot feed value of shape (128,) for Tensor 'Target:0', which has shape '(128, ?)'

I know this question has been asked a lot but I cannot figure out the solution.
I have already built a cnn and preproccecing for images and everything worked fine until I set the feed_dict parameter in sess.run. I have 24 classes and the images are sized as (64,64). Both imagepaths and labels are lists of paths.
imagepaths = tf.convert_to_tensor(imagepaths, dtype=tf.string)
labels = tf.convert_to_tensor(labels, dtype=tf.int32)
image, label = tf.train.slice_input_producer([imagepaths, labels],
shuffle=True)
#(here) i did decoding,resing and normalizing
X, Y = tf.train.batch([image, label], batch_size=batch_size,
capacity=batch_size * 8,
num_threads=4)
place_holder_X = tf.placeholder(tf.string,(128, 64, 64, 3), name="Input")
place_holder_Y = tf.placeholder(tf.int32,(128,None), name="Target")
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
for step in range(1, num_steps+1):
#print("step = ",step,"display step",display_step,end=' ')
if coord.should_stop():
break
if step % display_step == 0:
images, labels = sess.run([X, Y])
_, loss, acc = sess.run([train_op, loss_op, accuracy], feed_dict={place_holder_X: images, place_holder_Y: labels})
print("Step " + str(step) + ", Minibatch Loss= " + \
"{:.4f}".format(loss) + ", Training Accuracy= " + \
"{:.3f}".format(acc))
else:
# Only run the optimization op (backprop)
sess.run(train_op)
except Exception as e:
coord.request_stop(e)
finally:
coord.request_stop()
coord.join(threads)
print("Optimization Finished!")
print("Time taken: %f" % (time.time() - startTime))
saver = tf.train.Saver()
saver.save(sess, "./models1/my_tf_model.ckpt")

train two seperate tensorflow models at once

I am new in tensorflow, i am trying to train to tensorflow models which are connected with a dot product output layer. The input are two 2048 float vectors.
When I run the script I always get these errors:
Traceback (most recent call last):
File "classifier.py", line 120, in
_, summary = sess.run([optimizer, merged], feed_dict={x1: batch_x1s, x2: batch_x2s})
File "/Users/Joachim/work/tensorflow/virtualenv/tensorflow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/Users/Joachim/work/tensorflow/virtualenv/tensorflow/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 968, in _run
np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
File "/Users/Joachim/work/tensorflow/virtualenv/tensorflow/lib/python3.6/site-packages/numpy/core/numeric.py", line 531, in asarray
return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.
Here is my code:
import tensorflow as tf
import sys
import math
import os
import numpy as np
import json
import argparse
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from tensorflow.python.platform import gfile
from progress.bar import Bar
bottleneck_dir = 'bottlenecks'
### LOAD DATA FROM BOTTLENECKS
data_inputs = []
data_labels = []
data_expected_result=[]
bottleneck_list = []
file_glob = os.path.join(bottleneck_dir, '*.txt')
bottleneck_list.extend(gfile.Glob(file_glob))
for bottleneck_file in bottleneck_list:
bottleneck = open(bottleneck_file)
bottleneck_string = bottleneck.read()
bottleneck_values = [float(x) for x in bottleneck_string.split(',')]
imageName=bottleneck_file.split('.')[0]
helper=False
for i in range(len(data_labels)):
if imageName==data_labels[i]:
if 'search' in bottleneck_file:
data_inputs[i][0]=bottleneck_values
else:
data_inputs[i][1]=bottleneck_values
helper=true
if helper!=True:
if 'search' in bottleneck_file:
data_inputs.append([bottleneck_values,[]])
else:
data_inputs.append([[],bottleneck_values])
data_expected_result.append(1);
data_inputs_x1 = [i[0] for i in data_inputs]
data_inputs_x2 = [i[1] for i in data_inputs]
# Setting hyperparameters
learning_rate = 0.01
batch_size = 4
epochs = 1
log_batch_step = 50
n_features = np.size(data_inputs, 1)
tf.reset_default_graph()
graph = tf.get_default_graph()
inputVectorSize=2048
outputVectorSize=2048
x1 = tf.placeholder(tf.float32, [None, inputVectorSize], name='x1')#input layer
x2 = tf.placeholder(tf.float32, [None, inputVectorSize], name='x2')#input layer
dense1 = tf.layers.dense(inputs=x1, units=inputVectorSize, activation=tf.nn.relu)
logits1 = tf.layers.dense(inputs=dense1, units=outputVectorSize, activation=tf.nn.relu)
logits1_normalized=tf.nn.softmax(logits1)
dense2 = tf.layers.dense(inputs=x2, units=inputVectorSize, activation=tf.nn.relu)
logits2 = tf.layers.dense(inputs=dense2, units=outputVectorSize, activation=tf.nn.relu)
logits2_normalized=tf.nn.softmax(logits2)
output = tf.reduce_sum( tf.multiply( logits1_normalized, logits2_normalized), 1, keep_dims=True )
# Defining loss of network
loss = data_expected_result-output
tf.summary.scalar('loss', loss)
# Setting optimiser
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
# Define accuracy
accuracy = loss
tf.summary.scalar('accuracy', accuracy)
# For saving checkpoint after training
saver = tf.train.Saver()
merged = tf.summary.merge_all()
# use in command line: tensorboard --logdir=path/to/log --> to view tensorboard
# Run tensorflow session
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
train_writer = tf.summary.FileWriter('log', sess.graph)
tf.train.write_graph(sess.graph_def, '', 'savedgraph.pbtxt', as_text=False)
# Running the training in batches
batch_count = int(math.ceil(len(data_inputs)/batch_size))
for epoch_i in range(epochs):
batches_pbar = tqdm(range(batch_count), desc='Epoch {:>2}/{}'.format(epoch_i+1, epochs), unit='batches')
# The training cycle
for batch_i in batches_pbar:
# Get a batch of training features and labels
batch_start = batch_i*batch_size
batch_x1s = data_inputs_x1[batch_start:batch_start + batch_size]
batch_x2s = data_inputs_x2[batch_start:batch_start + batch_size]
# Run optimizer
_, summary = sess.run([optimizer, merged], feed_dict={x1: batch_x1s, x2: batch_x2s})
train_writer.add_summary(summary, batch_i)
# Check accuracy against validation data
val_accuracy, val_loss = sess.run([accuracy, loss], feed_dict={x1: data_inputs_x1[0:len(data_inputs-1)], x2: data_inputs_x2[0:len(data_inputs-1)]})
print("After epoch {}, Loss: {}, Accuracy: {}".format(epoch_i+1, val_loss, val_accuracy))
test_accuracy, test_loss = sess.run([accuracy, loss], feed_dict={x1: data_inputs_x1[0:len(data_inputs-1)], x2: data_inputs_x2[0:len(data_inputs-1)]})
print ("TEST LOSS: {}, TEST ACCURACY: {}".format(test_loss, test_accuracy))
g = tf.get_default_graph()
saver.save(sess, 'savedgraph')
Can anyone show my what to do to fix the problem?
You need to feed array not list. change the lines where used list as feed_dict input.
batch_x1s = np.asarray(data_inputs_x1[batch_start:batch_start + batch_size])
batch_x2s = np.asarray(data_inputs_x2[batch_start:batch_start + batch_size])
...
test_accuracy, test_loss = sess.run([accuracy, loss], feed_dict=
{x1:np.asarray(data_inputs_x1[0:len(data_inputs-1)]), x2:
np.asarray(data_inputs_x2[0:len(data_inputs-1)])})
I found the Problem, it was a problem with the input data.
import tensorflow as tf
import sys
import math
import os
import numpy as np
import json
import argparse
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from tensorflow.python.platform import gfile
from progress.bar import Bar
bottleneck_dir = 'bottlenecks'
### LOAD DATA FROM BOTTLENECKS
data_inputs = []
data_labels = []
data_expected_result=[]
bottleneck_list = []
file_glob = os.path.join(bottleneck_dir, '*.txt')
bottleneck_list.extend(gfile.Glob(file_glob))
for bottleneck_file in bottleneck_list:
bottleneck = open(bottleneck_file)
bottleneck_string = bottleneck.read()
bottleneck_values = [float(x) for x in bottleneck_string.split(',')]
imageName=bottleneck_file.split('.')[0]
helper=False
for i in range(len(data_labels)):
if imageName==data_labels[i]:
if 'search' in bottleneck_file:
data_inputs[i][0]=np.asarray(bottleneck_values)
else:
data_inputs[i][1]=np.asarray(bottleneck_values)
helper=True
if helper!=True:
if 'search' in bottleneck_file:
data_inputs.append([bottleneck_values,[]])
else:
data_inputs.append([[],bottleneck_values])
data_expected_result.append(1);
data_labels.append(imageName);
data_inputs_x1 = [i[0] for i in data_inputs]
data_inputs_x2 = [i[1] for i in data_inputs]
for i in range(len(data_inputs_x2)):
print(len(data_inputs_x2[i]))
# Setting hyperparameters
learning_rate = 0.01
batch_size = 4
epochs = 1
log_batch_step = 50
n_features = np.size(data_inputs, 1)
tf.reset_default_graph()
graph = tf.get_default_graph()
inputVectorSize=2048
outputVectorSize=2048
x1 = tf.placeholder(tf.float32, [None, inputVectorSize], name='x1')#input layer
x2 = tf.placeholder(tf.float32, [None, inputVectorSize], name='x2')#input layer
dense1 = tf.layers.dense(inputs=x1, units=inputVectorSize, activation=tf.nn.relu)
logits1 = tf.layers.dense(inputs=dense1, units=outputVectorSize, activation=tf.nn.relu)
logits1_normalized=tf.nn.softmax(logits1)
dense2 = tf.layers.dense(inputs=x2, units=inputVectorSize, activation=tf.nn.relu)
logits2 = tf.layers.dense(inputs=dense2, units=outputVectorSize, activation=tf.nn.relu)
logits2_normalized=tf.nn.softmax(logits2)
output = tf.reduce_sum( tf.multiply( logits1_normalized, logits2_normalized), 1, keep_dims=True )
# Defining loss of network
loss = tf.reduce_sum(tf.subtract(1.0,output));
tf.summary.scalar('loss', loss)
# Setting optimiser
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
# Define accuracy
accuracy = loss
tf.summary.scalar('accuracy', accuracy)
# For saving checkpoint after training
saver = tf.train.Saver()
merged = tf.summary.merge_all()
# use in command line: tensorboard --logdir=path/to/log --> to view tensorboard
# Run tensorflow session
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
train_writer = tf.summary.FileWriter('log', sess.graph)
tf.train.write_graph(sess.graph_def, '', 'savedgraph.pbtxt', as_text=False)
# Running the training in batches
batch_count = int(math.ceil(len(data_inputs)/batch_size))
for epoch_i in range(epochs):
batches_pbar = tqdm(range(batch_count), desc='Epoch {:>2}/{}'.format(epoch_i+1, epochs), unit='batches')
# The training cycle
for batch_i in batches_pbar:
# Get a batch of training features and labels
batch_start = batch_i*batch_size
batch_x1s = np.asarray(data_inputs_x1[batch_start:batch_start + batch_size])
batch_x2s = np.asarray(data_inputs_x2[batch_start:batch_start + batch_size])
# Run optimizer
_, summary = sess.run([optimizer, merged], feed_dict={x1: batch_x1s, x2: batch_x2s})
train_writer.add_summary(summary, batch_i)
# Check accuracy against validation data
val_accuracy, val_loss = sess.run([accuracy, loss], feed_dict={x1: np.asarray(data_inputs_x1), x2: np.asarray(data_inputs_x2)})
print("After epoch {}, Loss: {}, Accuracy: {}".format(epoch_i+1, val_loss, val_accuracy))
test_accuracy, test_loss = sess.run([accuracy, loss], feed_dict={x1: np.asarray(data_inputs_x1), x2: np.asarray(data_inputs_x2)})
print ("TEST LOSS: {}, TEST ACCURACY: {}".format(test_loss, test_accuracy))
g = tf.get_default_graph()
saver.save(sess, 'savedgraph')

Training accuracy of CNN in Tensorflow not improving over 30 percent and cost remains almost same

I am using a data set that consists of a list of 9(1 D data) real numbers(which represent function values at regularly spaced points). I then calculate the derivative of the function at the middle point using different difference function namely forward difference and backward difference etc. The labels( 12 in number) of my problem are the most favourable number of points to be chosen so that the calculated derivative is closest to the actual derivative.
I am using 2 convolutional layers for 1x9 list of real integers with stride =1 and window size = 2 for each and feature maps being 2 and 4. Then I have 2 fully connected layer with 500 and then 12 neurons.
My problem is that my accuracy is not going over 30 percent and cost remains around 2.5. Is there something I am doing wrong???
import tensorflow as tf
import numpy as np
import random
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from keras.utils import np_utils
def calculation_of_labels(func,derf):
#label=1
f2f= (func[5]-func[4])/(0.125)
#t=abs(f2f-derf)
#actualDer= f2f
f2b= (func[4]-func[3])/(0.125)
f3=(func[5]-func[3])/(0.125*2)
f4b=(2*func[5]+3*func[4]-6*func[3]+func[2])/(6*0.125)
f4f= (-1*func[6]+6*func[5]-3*func[4]-2*func[3])/(6*0.125)
f5=(-1*func[6]+8*func[5]-8*func[3]+func[2])/(0.125*12)
f6b=(-3*func[6]+30*func[5]+20*func[4]-60*func[3]+15*func[2]-2*func[1])/(0.125*60)
f6f=(2*func[7]-15*func[6]+func[5]-20*func[4]-30*func[3]+3*func[2])/(0.125*60)
f7=(func[7]-9*func[6]+45*func[5]-45*func[3]+9*func[2]-1*func[1])/(0.125*60)
f8f=(-3*func[8]+28*func[7]-126*func[6]+420*func[5]-105*func[4]-252*func[3]+42*func[2]-4*func[1])/(0.125*420)
f8b=(4*func[7]-42*func[6]+252*func[5]+105*func[4]-420*func[3]+126*func[2]-28*func[1]+3*func[0])/(0.125*420)
f9=(-3*func[8]+32*func[7]-168*func[6]+672*func[5]-672*func[3]+168*func[2]-32*func[1]+3*func[0])/(0.125*840)
myList=[f2b,f2f,f3,f4b,f4f,f5,f6b,f6f,f7,f8b,f8f,f9]
b=min(myList, key=lambda x:abs(x-derf))
a=myList.index(b)
return a
fun_mat=[]
lab=[]
#print(lab)
fun_der_mat=0
for number in range(0,50000):
function=0
fder=0
#phi =random.uniform(0,1)
Ak= random.uniform(1,5)
xv=np.arange(0,1.1,0.125)
for k in range(1,5):
phi = random.uniform(0,1)
function = function+ (Ak/k)*np.sin(2*np.pi*k*xv+ phi*2*np.pi)
fder = fder+ (Ak)*2*np.pi*np.cos(2*np.pi*k*xv+ phi*2*np.pi)
for j in range(0,9):
function[j] = round(function[j],3)
fder[j] = round(fder[j],3)
fun_mat.append(function)
lab.append(calculation_of_labels(function, fder[4]))
logs_path = '/tmp/check/'
#inputdata,Label = shuffle(fun_mat,lab, random_state = 2)
#print(fun_mat)
#print(inputdata)
#raw_data = [inputdata, Label]
with tf.name_scope('input'):
x= tf.placeholder(tf.float32, [None,9], name = "X_input")
y_ = tf.placeholder(tf.float32, [None,12], name = "Y_input")
x= tf.reshape(x,shape=[2000,1,9,1])
#MODEL BEING BUILT
def weight_variable(shape, nam):
initial = tf.truncated_normal(shape, stddev=0.1, name=nam)
return tf.Variable(initial)
def bias_variable(shape, na):
initial = tf.constant(0.1, shape=shape, name = na)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x,ksize=[1,1,2,1],strides=[1,1,2,1], padding='SAME')
def next_batch(index_receive, dat, labels):
'''
Return a total of `num` random samples and labels.
'''
"""idx = np.arange(0 , len(dat))
np.random.shuffle(idx)
idx = idx[:num]"""
dat_shuffle = [dat[i] for i in index_receive]
labels_shuffle = [labels[i] for i in index_receive]
#for i in range(0, index_receive.size):
return np.asarray(dat_shuffle), np.asarray(labels_shuffle)
with tf.name_scope("Conv_Layer_1"):
w_conv1 = weight_variable([1,2,1,2], "w_conv1")
b_conv1= bias_variable([2],"b_conv1")
tf.summary.histogram("weights",w_conv1)
tf.summary.histogram("biases",b_conv1)
with tf.name_scope("Conv_Layer_2"):
w_conv2= weight_variable([1,2,2,4],"w_conv2")
b_conv2= bias_variable([4],"b_conv2")
tf.summary.histogram("weights",w_conv2)
tf.summary.histogram("biases",b_conv2)
with tf.name_scope("Fully_Conn_L_1"):
w_fc1 = weight_variable([3*4,500], "w_fc1")
b_fc1 = bias_variable([500], "b_fc1")
tf.summary.histogram("weights",w_fc1)
tf.summary.histogram("biases",b_fc1)
with tf.name_scope("Fully_Conn_L_2"):
w_fc2 = weight_variable([500,12],"w_fc2")
b_fc2 = bias_variable([12],"b_fc2")
tf.summary.histogram("weights",w_fc2)
tf.summary.histogram("biases",b_fc2)
#with tf.name_scope("Fully_Conn_L_3"):
#w_fc3 = weight_variable([500,12],"w_fc3")
#b_fc3= bias_variable([12], "b_fc3")
#tf.summary.histogram("weights",w_fc3)
#tf.summary.histogram("biases",b_fc3)
for i in range(0,50000):
for j in range(0,9):
fun_mat[i][j]= fun_mat[i][j]/5
X= fun_mat
y= lab
#(X,y)= (raw_data[0],raw_data[1])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=2)
X_train= np.array(X_train)
X_test= np.array(X_test)
#print(X_train)
X_train=X_train.reshape(X_train.shape[0],9,1)
X_test= X_test.reshape(X_test.shape[0],9,1)
#print(X_train)
Y_train = np_utils.to_categorical(y_train,12)
Y_test = np_utils.to_categorical(y_test,12)
#print(Y_train[1])
keep_prob = tf.placeholder(tf.float32)
def model(data):
with tf.name_scope("MODEL_LAYER_OUTPUTS"):
h_conv1 = tf.nn.relu(conv2d(data, w_conv1)+ b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2)+ b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2,[-1,3*4])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,w_fc1) + b_fc1)
h_fc1_drop= tf.nn.dropout(h_fc1,keep_prob)
#h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop,w_fc2) + b_fc2)
#h_fc2_drop= tf.nn.dropout(h_fc2, keep_prob)
tf.summary.histogram("CONV_Layer_1_act ", h_conv1)
tf.summary.histogram("Pool_Layer_1_act ", h_pool1)
tf.summary.histogram("CONV_Layer_2_act ", h_conv2)
tf.summary.histogram("Pool_Layer_1_act ", h_pool2)
tf.summary.histogram("F_C_Layer_1_act ", h_fc1_drop)
#tf.summary.histogram("F_C_Layer_2_act ", h_fc2_drop)
return tf.nn.softmax(tf.matmul(h_fc1_drop,w_fc2) + b_fc2)
#return tf.matmul(h_fc1_drop,w_fc2) + b_fc2
y= model(x)
with tf.name_scope("COST"):
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
cross_entropy = tf.reduce_mean(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y, labels=y_))+0.01*tf.nn.l2_loss(w_fc1) + 0.01*tf.nn.l2_loss(w_fc2))
tf.summary.scalar("COST", cross_entropy)
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
with tf.name_scope("ACCURACY"):
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
tf.summary.scalar("ACCURACY", accuracy)
#sess.run(tf.global_variables_initializer())
summ = tf.summary.merge_all()
saver = tf.train.Saver()
batch_size=2000
with tf.Session() as sess:
#sess.run(init)
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(logs_path, graph= tf.get_default_graph())
#writer.add_graph(sess.graph)
for i in range(1000):
avg_cost=0.
#INDEX GENERATE
index=np.arange(0,40000)
np.random.shuffle(index)
#extra = data_set_size%batch_size
batch_count = 40000//batch_size + 1
for j in range(batch_count):
start = j*batch_size
end_idx = start+ batch_size
if j == (batch_count - 1):
break
swiped_index= index[start:end_idx]
#print(swiped_index)
batch = next_batch(swiped_index, X_train, Y_train)
#print(batch[0])
batcht=np.reshape(batch[0],(batch_size,1,9,1))
_, summary = sess.run([optimizer, summ], feed_dict={x: batcht, y_: batch[1],keep_prob: 0.5})
#avg_cost += c / batch_count
#optimizer.run(feed_dict={x:batcht, y_: batch[1], keep_prob : 0.5})
if i%50 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batcht, y_: batch[1], keep_prob: 1.0})
writer.add_summary(summary, i * batch_count + i)
print("step %d, Training accuracy %g" %( i, train_accuracy))
#print("step %d, COST: %g" %( i, avg_cost))
saver.save(sess, "C:/Users/Nikhil/Desktop/intern 3july/model1_first")
index_rec=np.arange(0,10000)
np.random.shuffle(index_rec)
swiped=index_rec[0:2000]
batch1= next_batch(swiped,X_test,Y_test)
batch1t=np.reshape(batch1[0],(batch_size,1,9,1))
print("test accuracy %g"%accuracy.eval(feed_dict={x: batch1t, y_: batch1[1], keep_prob: 1.0}))

Categories