Seq2Seq Loss Function Help Tensorflow - python

I'm having trouble trying to figure out how to create a loss function for my basic_seq2seq model.
My input is a paragraph and the output is a section title for the paragraph.
Here is my current code:
import tensorflow as tf
import numpy as np
import pickle
import sys
MAX_NUM_WORDS = 500000
MAX_PAR_LENGTH = 85
CONV_DIM = 128
SECTION_LENGTH = 45
EPOCHS = 100
num_paragraphs = 5200000
BATCH_SIZE = 20
SECTION_VOCAB_SIZE = 213884
weights_lstm = {'out': tf.Variable(tf.random_normal([BATCH_SIZE, 200, SECTION_VOCAB_SIZE]))}
biases_lstm = {'out': tf.Variable(tf.random_normal([BATCH_SIZE, SECTION_VOCAB_SIZE]))}
embedding_matrix = np.zeros((MAX_NUM_WORDS+1, 200))
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv1d(x, W):
return tf.nn.conv1d(x, W, stride=1, padding='SAME')
def max_pool_1d(x):
return tf.layers.max_pooling1d(x, pool_size=2, strides=2, padding='same')
def batch_norm(x):
return tf.layers.batch_normalization(x)
def model(x, y):
input = x
with tf.device('/cpu:0'):
input = tf.nn.embedding_lookup(W_e, x)
output_y = tf.nn.embedding_lookup(W_e, y)
#encoder
bn1 = batch_norm(input)
an1 = tf.nn.relu(bn1)
drop1 = tf.layers.dropout(an1, 0.2)
W_conv1 = weight_variable([3, 200, CONV_DIM])
b_conv1 = bias_variable([CONV_DIM])
h_conv1 = tf.nn.relu(conv1d(drop1, W_conv1) + b_conv1)
bn2 = batch_norm(h_conv1)
an2 = tf.nn.relu(bn2)
W_conv2 = weight_variable([3, CONV_DIM, CONV_DIM/2])
b_conv2 = bias_variable([CONV_DIM/2])
h_conv2 = tf.nn.relu(conv1d(an2, W_conv2) + b_conv2)
bn3 = batch_norm(h_conv2)
an3 = tf.nn.relu(bn3)
W_conv3 = weight_variable([3, CONV_DIM/2, CONV_DIM/4])
b_conv3 = bias_variable([CONV_DIM/4])
h_conv3 = tf.nn.relu(conv1d(an3, W_conv3) + b_conv3)
mp1 = max_pool_1d(h_conv3)
enc = tf.unstack(mp1, axis=1)
dec = tf.unstack(output_y, axis=1)
lstm_cell = tf.contrib.rnn.LSTMCell(200, forget_bias=1.0, activation=tf.nn.softmax)
outputs, states = tf.contrib.legacy_seq2seq.basic_rnn_seq2seq(enc, dec, lstm_cell)
projected_outputs = []
with tf.device('/cpu:0'):
for output in outputs:
projected_output = (weights_lstm['out'] * output) + biases_lstm['out']
projected_outputs.append(projected_output)
stacked_outputs = tf.stack(projected_outputs, 1) # [? x 45 x V]
print(stacked_outputs)
weights = tf.ones_like(y, dtype=tf.float32)
loss = tf.contrib.seq2seq.sequence_loss(logits = stacked_outputs, targets = y, weights = weights, name = 'loss')
# gold_outputs = tf.unstack(output_y, axis=1)
#cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=output_y, logits=outputs))
# output_y = [? x 45 x 200]
# outputs = 45 tensors of [? x 200]
# stacked_outputs = tf.stack(outputs, 1) # [? x 45 x 200]
# correct_prediction = tf.equal(tf.argmax(stacked_outputs, 1), tf.argmax(output_y, 1))
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return outputs, loss #cross_entropy
#print('Loading Embeddings...')
#with open('embeddings.txt', 'rb') as f:
# embedding_matrix = pickle.load(f)
print('Creating Placeholders...')
X = tf.placeholder(tf.int32, [None, MAX_PAR_LENGTH])
Y = tf.placeholder(tf.int32, [None, SECTION_LENGTH])
with tf.device('/cpu:0'):
W_e = tf.Variable(embedding_matrix, dtype=tf.float32, trainable=False)
print('Creating Model...')
preds, loss = model(X, Y)
print('Creating Training Parameters...')
train_step = tf.train.RMSPropOptimizer(1e-4).minimize(loss)
saver = tf.train.Saver()
print('Starting Session...')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(EPOCHS):
print('Epoch ' + str(i))
print('Number of batches ', str(num_paragraphs/BATCH_SIZE))
with open('section_train_data_final.txt', 'rb') as f:
for j in range(num_paragraphs/BATCH_SIZE):
#load data
paragraphs = []
for k in range(BATCH_SIZE):
paragraphs.append(pickle.load(f))
x = np.array([ p for p,s in paragraphs ])
#y = np.array([ sess.run(tf.one_hot(s, depth=SECTION_VOCAB_SIZE, on_value=1.0, off_value=0.0)) for p,s in paragraphs ])
y = np.array([ s for p,s in paragraphs ])
_, step_loss = sess.run([train_step, loss], feed_dict={X:x, Y: y})
if j % 100 == 0 and j != 0:
# train_acc = sess.run(accuracy, feed_dict={X: x, Y: y})
print('Epoch %d: Batch %d: Loss: %g' % (i, j, step_loss))
saver.save(sess, '~\data\generation_model')
Any help on how to create this loss function would be helpful.
I'm very new to tensorflow so I tried the simple loss function that's commented out
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=output_y, logits=outputs))
But it didn't work as the loss always came out to 0. My friend tried to create a loss function that's currently what is there but I have no clue what he was trying to do.

Related

ValueError: Cannot feed value of shape (100, 200, 66, 3) for Tensor Placeholder:0, which has shape (?, 66, 200, 3)

My development environment is windows and I encountered a error while executing my code. How can I fix this?
The result below is the output when running train.py.
Error generated while executing code:
ValueError: Cannot feed value of shape (100, 200, 66, 3) for Tensor Placeholder:0, which has shape (?, 66, 200, 3)
The inside of the data.csv file is saved in the following format.
data.csv
img_2022-07-30_16-12-38_0.jpg,2
train.py
import os
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from tensorflow.core.protobuf import saver_pb2
import driving_data
import model
import numpy as np
import time
begin = time.strftime('%Y-%m-%d_%H-%M-%S')
LOGDIR = './save'
tf.logging.set_verbosity(tf.logging.ERROR)
sess = tf.InteractiveSession()
L2NormConst = 0.001
train_vars = tf.trainable_variables()
start_learning_rate = 0.5e-3
adjust_learning_rate = 1e-5
onehot_labels = tf.one_hot(indices=tf.reshape(tf.cast(model.y_, tf.int32),[-1]), depth=4)
loss = tf.losses.softmax_cross_entropy( onehot_labels=onehot_labels, logits=model.y)
train_step = tf.train.AdamOptimizer(start_learning_rate).minimize(loss)
loss_val = tf.losses.softmax_cross_entropy( onehot_labels=onehot_labels, logits=model.y)
sess.run(tf.global_variables_initializer())
tf.summary.scalar("loss", loss)
tf.summary.scalar("loss_val", loss_val)
merged_summary_op = tf.summary.merge_all()
saver = tf.train.Saver(write_version = tf.train.SaverDef.V2)
logs_path = './logs'
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
epochs = 13
batch_size = 100
for epoch in range(epochs):
for i in range(int(driving_data.num_images/batch_size)):
xs, ys = driving_data.LoadTrainBatch(batch_size)
train_step.run(feed_dict={model.x: xs, model.y_: ys, model.keep_prob: 0.7})
loss_value = loss.eval(feed_dict={model.x: xs, model.y_: ys, model.keep_prob: 1.0})
print("Epoch: %d, Step: %d, Loss: %g" % (epoch, i, loss_value))
if i % 10 == 0:
xs_val, ys_val = driving_data.LoadValBatch(batch_size)
loss_val = loss.eval(feed_dict={model.x:xs_val, model.y_: ys_val, model.keep_prob: 1.0})
print("Epoch: %d, Step: %d, Loss_val: %g" % (epoch, i, loss_val))
summary = merged_summary_op.eval(feed_dict={model.x:xs, model.y_: ys, model.keep_prob: 1.0})
summary_writer.add_summary(summary, epoch * driving_data.num_images/batch_size + i)
if i % batch_size == 0:
if not os.path.exists(LOGDIR):
os.makedirs(LOGDIR)
checkpoint_path = os.path.join(LOGDIR, "model.ckpt")
filename = saver.save(sess, checkpoint_path)
print("Model saved in file: %s" % filename)
correct_prediction = tf.equal(tf.argmax(onehot_labels, 1), tf.argmax(model.y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Train Accuracy:', sess.run(accuracy, feed_dict={model.x: xs, model.y_: ys, model.keep_prob: 1.0}))
print('Validation Accuracy:', sess.run(accuracy, feed_dict={model.x: xs_val, model.y_: ys_val, model.keep_prob: 1.0}))
end = time.strftime('%Y-%m-%d_%H-%M-%S')
print('begin: ', begin)
print('end: ', end)
print("Run the command line:\n" \
"--> tensorboard --logdir=./logs --port=6006" \
"\nThen open http://0.0.0.0:6006/ into your web browser")
model.py
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W, stride):
return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='VALID')
def softmax(x):
ex = np.exp(x)
sum_ex = np.sum( np.exp(x))
return ex/sum_ex
x = tf.placeholder(tf.float32, shape=[None, 66, 200, 3])
y_ = tf.placeholder(tf.float32, shape=[None, 1])
x_image = x
W_conv1 = weight_variable([5, 5, 3, 24])
b_conv1 = bias_variable([24])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1, 2) + b_conv1)
W_conv2 = weight_variable([5, 5, 24, 36])
b_conv2 = bias_variable([36])strong text
h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2, 2) + b_conv2)
W_conv3 = weight_variable([5, 5, 36, 48])
b_conv3 = bias_variable([48])
h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 2) + b_conv3)
W_conv4 = weight_variable([3, 3, 48, 64])
b_conv4 = bias_variable([64])
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4, 1) + b_conv4)
W_conv5 = weight_variable([3, 3, 64, 64])
b_conv5 = bias_variable([64])
h_conv5 = tf.nn.relu(conv2d(h_conv4, W_conv5, 1) + b_conv5)
W_fc1 = weight_variable([1152, 1164])
b_fc1 = bias_variable([1164])
h_conv5_flat = tf.reshape(h_conv5, [-1, 1152])
h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1164, 100])
b_fc2 = bias_variable([100])
h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)
W_fc3 = weight_variable([100, 50])
b_fc3 = bias_variable([50])
h_fc3 = tf.nn.relu(tf.matmul(h_fc2_drop, W_fc3) + b_fc3)
h_fc3_drop = tf.nn.dropout(h_fc3, keep_prob)
W_fc4 = weight_variable([50, 10])
b_fc4 = bias_variable([10])
h_fc4 = tf.nn.relu(tf.matmul(h_fc3_drop, W_fc4) + b_fc4)
h_fc4_drop = tf.nn.dropout(h_fc4, keep_prob)
W_fc5 = weight_variable([10, 4])
b_fc5 = bias_variable([4])
y = tf.matmul(h_fc4_drop, W_fc5) + b_fc5
print('model read')
driving_data.py
from cv2 import resize
from imageio import imread
import random
import csv
import config as cfg
xs = []
ys = []
train_batch_pointer = 0
val_batch_pointer = 0
with open('data/' + cfg.currentDir + '/data.csv', newline='') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in spamreader:
xs.append('data/' + cfg.currentDir + '/' + row[0])
ys.append(int(row[1]))
num_images = len(xs)
c = list(zip(xs, ys))
random.shuffle(c)
xs, ys = zip(*c)
"""
train_xs = xs[:int(len(xs) * 0.8)]
train_ys = ys[:int(len(xs) * 0.8)]
val_xs = xs[-int(len(xs) * 0.2):]
val_ys = ys[-int(len(xs) * 0.2):]
"""
train_xs = xs[:int(len(xs) * 1)]
train_ys = ys[:int(len(xs) * 1)]
val_xs = xs[-int(len(xs) * 1):]
val_ys = ys[-int(len(xs) * 1):]
num_train_images = len(train_xs)
num_val_images = len(val_xs)
def LoadTrainBatch(batch_size):
global train_batch_pointer
x_out = []
y_out = []
for i in range(0, batch_size):
x_out.append(resize(imread(train_xs[(train_batch_pointer + i) % num_train_images])[cfg.modelheight:], [66, 200]) / 255.0)
y_out.append([train_ys[(train_batch_pointer + i) % num_train_images]])
train_batch_pointer += batch_size
return x_out, y_out
def LoadValBatch(batch_size):
global val_batch_pointer
x_out = []
y_out = []
for i in range(0, batch_size):
x_out.append(resize(imread(val_xs[(val_batch_pointer + i) % num_val_images])[cfg.modelheight:], [66, 200]) / 255.0)
y_out.append([val_ys[(val_batch_pointer + i) % num_val_images]])
val_batch_pointer += batch_size
return x_out, y_out
remove python 3.10.5 and
After installing 3.6.8, I entered the following command and it worked fine.
python.exe -m pip install -U pip
python.exe -m pip install tensorflow==1.15.0 tensorboard==1.15.0 scipy==1.2.0 urllib3==1.13.1 Pillow opencv-python```

I use CNN to classification MNIST with TensorFlow. But I want to read the dataset with TFRecordrs. However I only get 10% accuracy

I want to classification the dataset of mnist in my way with TensorFlow.
First, converting the dataset into a TFRecords file.
Then, reading this file with tf.TFRecodsReader.
Finally,training the dataset.
Existing problems:The code doesn't have any syntax errors. But,I only get 10% accuracy on the test data.
train.py:
import tensorflow as tf
import Net
import os
import numpy as np
import datetime
import time
import tfrecords as rd
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 10000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = "Model/"
MODEL_NAME = "model"
def train():
x = tf.placeholder(tf.float32, [
BATCH_SIZE,
Net.IMAGE_SIZE,
Net.IMAGE_SIZE,
Net.NUM_CHANNELS],
name='x-input')
y_ = tf.placeholder(tf.float32, [None, Net.OUTPUT_NODE], name='y-input')
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
y = Net.inference(x, True, regularizer)
global_step = tf.Variable(0, trainable=False)
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
55000 / BATCH_SIZE, LEARNING_RATE_DECAY,
staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
img, one_hot = rd.read_and_decode("./tfrecords/train.tfrecords")
img_batch, label_batch = tf.train.shuffle_batch(tensors = [img, one_hot], batch_size = BATCH_SIZE, capacity = 10000 + 3 * 100, min_after_dequeue = 10000)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord = coord, sess = sess)
try:
steps = 1
while not coord.should_stop():
if steps > TRAINING_STEPS:
break
xs, ys = sess.run([img_batch, label_batch])
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
if steps % 1000 == 0:
print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
steps += 1
except tf.errors.OutOfRangeError:
print("Done training after reading all data")
finally:
coord.request_stop()
coord.join(threads)
sess.close()
def main(argv=None):
start_time = datetime.datetime.now()
print("start_time = "),
print(start_time)
rd.create_record("train")
train_start_time = datetime.datetime.now()
print("train_start_time = " ),
print( train_start_time)
train()
end_time = datetime.datetime.now()
print("end_time = " ),
print(end_time)
if __name__ == '__main__':
main()
Net.py:
import tensorflow as tf
INPUT_NODE = 784
OUTPUT_NODE = 10
IMAGE_SIZE = 28
NUM_CHANNELS = 1
NUM_LABELS = 10
CONV1_DEEP = 32
CONV1_SIZE = 5
CONV2_DEEP = 64
CONV2_SIZE = 5
FC_SIZE = 512
def inference(input_tensor, train, regularizer):
with tf.variable_scope('layer1-conv1'):
conv1_weights = tf.get_variable(
"weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.get_variable("bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0))
conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
with tf.name_scope("layer2-pool1"):
pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="SAME")
with tf.variable_scope("layer3-conv2"):
conv2_weights = tf.get_variable(
"weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2_biases = tf.get_variable("bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
with tf.name_scope("layer4-pool2"):
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
pool_shape = pool2.get_shape().as_list()
nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
reshaped = tf.reshape(pool2, [pool_shape[0], nodes])
with tf.variable_scope('layer5-fc1'):
fc1_weights = tf.get_variable("weight", [nodes, FC_SIZE],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1))
fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
if train: fc1 = tf.nn.dropout(fc1, 0.5)
with tf.variable_scope('layer6-fc2'):
fc2_weights = tf.get_variable("weight", [FC_SIZE, NUM_LABELS],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
logit = tf.matmul(fc1, fc2_weights) + fc2_biases
return logit
tfrecords.py:
import os
import tensorflow as tf
from PIL import Image
import Net
def create_record(op_type):
writer = tf.python_io.TFRecordWriter("./tfrecords/" + op_type + ".tfrecords")
f = open("./" + op_type + ".txt", 'r')
img_num = 0
for line in f.readlines():
img_num += 1
if img_num % 2000 == 0:
print("already read in %d images." % (img_num))
str_split = line.split()
img_path = "./" + str_split[0]
index = int(str_split[1])
img = Image.open(img_path)
img = img.resize((Net.IMAGE_SIZE, Net.IMAGE_SIZE))
img_raw = img.tobytes()
example = tf.train.Example(features=tf.train.Features(feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
}))
writer.write(example.SerializeToString())
writer.close()
def read_and_decode(filename):
filename_queue = tf.train.string_input_producer([filename], shuffle = True)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw' : tf.FixedLenFeature([], tf.string),
})
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [Net.IMAGE_SIZE, Net.IMAGE_SIZE, Net.NUM_CHANNELS])
img = 1 - tf.cast(img, tf.float32) * (1. / 255)
label = tf.cast(features['label'], tf.int32)
one_hot = tf.one_hot(label, 10, dtype = tf.float32)
one_hot = tf.reshape(one_hot, [-1])
return img, one_hot
Eval.py:
import time
import math
import tensorflow as tf
import numpy as np
import Net
import Train
import tfrecords as rd
def evaluate():
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32, [
10000,
Net.IMAGE_SIZE,
Net.IMAGE_SIZE,
Net.NUM_CHANNELS],
name='x-input')
y_ = tf.placeholder(tf.float32, [None, Net.OUTPUT_NODE], name='y-input')
#validate_feed = {x: mnist.test.images, y_: mnist.test.labels}
global_step = tf.Variable(0, trainable=False)
regularizer = tf.contrib.layers.l2_regularizer(Train.REGULARIZATION_RATE)
y = Net.inference(x, False, regularizer)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
variable_averages = tf.train.ExponentialMovingAverage(Train.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
for i in range(1):
img, one_hot = rd.read_and_decode("./tfrecords/test.tfrecords")
img_batch, label_batch = tf.train.shuffle_batch(tensors = [img, one_hot], batch_size = 10000, capacity = 10000 + 3 * 100, min_after_dequeue = 10000)
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(Train.MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
#the reason of this error!!!
#tf.global_variables_initializer().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord = coord, sess = sess)
try:
xs, ys = sess.run([img_batch, label_batch])
accuracy_score = sess.run(accuracy, feed_dict={x:xs, y_:ys})
print("After %s training step(s), test accuracy = %g" % (global_step, accuracy_score))
except tf.errors.OutOfRangeError:
print("Done testting after reading all data")
finally:
coord.request_stop()
coord.join(threads)
sess.close()
else:
print('No checkpoint file found')
return
def main(argv=None):
rd.create_record("test")
evaluate()
if __name__ == '__main__':
main()
Now, I have solved this problem.
My code and other flie: https://github.com/xmy7216/MNIST_classification.git
Linux: Red Hat Enterprise Linux Server release 7.2 (Maipo)
GPU: Tesla P4
TensorFlow: 1.3
Python:2.7.5
I find the reason. Because after I load the model using saver.restore, I initialize the variables again by tf.global_variables_initializer().run().
How stupid I am!

Training accuracy of CNN in Tensorflow not improving over 30 percent and cost remains almost same

I am using a data set that consists of a list of 9(1 D data) real numbers(which represent function values at regularly spaced points). I then calculate the derivative of the function at the middle point using different difference function namely forward difference and backward difference etc. The labels( 12 in number) of my problem are the most favourable number of points to be chosen so that the calculated derivative is closest to the actual derivative.
I am using 2 convolutional layers for 1x9 list of real integers with stride =1 and window size = 2 for each and feature maps being 2 and 4. Then I have 2 fully connected layer with 500 and then 12 neurons.
My problem is that my accuracy is not going over 30 percent and cost remains around 2.5. Is there something I am doing wrong???
import tensorflow as tf
import numpy as np
import random
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from keras.utils import np_utils
def calculation_of_labels(func,derf):
#label=1
f2f= (func[5]-func[4])/(0.125)
#t=abs(f2f-derf)
#actualDer= f2f
f2b= (func[4]-func[3])/(0.125)
f3=(func[5]-func[3])/(0.125*2)
f4b=(2*func[5]+3*func[4]-6*func[3]+func[2])/(6*0.125)
f4f= (-1*func[6]+6*func[5]-3*func[4]-2*func[3])/(6*0.125)
f5=(-1*func[6]+8*func[5]-8*func[3]+func[2])/(0.125*12)
f6b=(-3*func[6]+30*func[5]+20*func[4]-60*func[3]+15*func[2]-2*func[1])/(0.125*60)
f6f=(2*func[7]-15*func[6]+func[5]-20*func[4]-30*func[3]+3*func[2])/(0.125*60)
f7=(func[7]-9*func[6]+45*func[5]-45*func[3]+9*func[2]-1*func[1])/(0.125*60)
f8f=(-3*func[8]+28*func[7]-126*func[6]+420*func[5]-105*func[4]-252*func[3]+42*func[2]-4*func[1])/(0.125*420)
f8b=(4*func[7]-42*func[6]+252*func[5]+105*func[4]-420*func[3]+126*func[2]-28*func[1]+3*func[0])/(0.125*420)
f9=(-3*func[8]+32*func[7]-168*func[6]+672*func[5]-672*func[3]+168*func[2]-32*func[1]+3*func[0])/(0.125*840)
myList=[f2b,f2f,f3,f4b,f4f,f5,f6b,f6f,f7,f8b,f8f,f9]
b=min(myList, key=lambda x:abs(x-derf))
a=myList.index(b)
return a
fun_mat=[]
lab=[]
#print(lab)
fun_der_mat=0
for number in range(0,50000):
function=0
fder=0
#phi =random.uniform(0,1)
Ak= random.uniform(1,5)
xv=np.arange(0,1.1,0.125)
for k in range(1,5):
phi = random.uniform(0,1)
function = function+ (Ak/k)*np.sin(2*np.pi*k*xv+ phi*2*np.pi)
fder = fder+ (Ak)*2*np.pi*np.cos(2*np.pi*k*xv+ phi*2*np.pi)
for j in range(0,9):
function[j] = round(function[j],3)
fder[j] = round(fder[j],3)
fun_mat.append(function)
lab.append(calculation_of_labels(function, fder[4]))
logs_path = '/tmp/check/'
#inputdata,Label = shuffle(fun_mat,lab, random_state = 2)
#print(fun_mat)
#print(inputdata)
#raw_data = [inputdata, Label]
with tf.name_scope('input'):
x= tf.placeholder(tf.float32, [None,9], name = "X_input")
y_ = tf.placeholder(tf.float32, [None,12], name = "Y_input")
x= tf.reshape(x,shape=[2000,1,9,1])
#MODEL BEING BUILT
def weight_variable(shape, nam):
initial = tf.truncated_normal(shape, stddev=0.1, name=nam)
return tf.Variable(initial)
def bias_variable(shape, na):
initial = tf.constant(0.1, shape=shape, name = na)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x,ksize=[1,1,2,1],strides=[1,1,2,1], padding='SAME')
def next_batch(index_receive, dat, labels):
'''
Return a total of `num` random samples and labels.
'''
"""idx = np.arange(0 , len(dat))
np.random.shuffle(idx)
idx = idx[:num]"""
dat_shuffle = [dat[i] for i in index_receive]
labels_shuffle = [labels[i] for i in index_receive]
#for i in range(0, index_receive.size):
return np.asarray(dat_shuffle), np.asarray(labels_shuffle)
with tf.name_scope("Conv_Layer_1"):
w_conv1 = weight_variable([1,2,1,2], "w_conv1")
b_conv1= bias_variable([2],"b_conv1")
tf.summary.histogram("weights",w_conv1)
tf.summary.histogram("biases",b_conv1)
with tf.name_scope("Conv_Layer_2"):
w_conv2= weight_variable([1,2,2,4],"w_conv2")
b_conv2= bias_variable([4],"b_conv2")
tf.summary.histogram("weights",w_conv2)
tf.summary.histogram("biases",b_conv2)
with tf.name_scope("Fully_Conn_L_1"):
w_fc1 = weight_variable([3*4,500], "w_fc1")
b_fc1 = bias_variable([500], "b_fc1")
tf.summary.histogram("weights",w_fc1)
tf.summary.histogram("biases",b_fc1)
with tf.name_scope("Fully_Conn_L_2"):
w_fc2 = weight_variable([500,12],"w_fc2")
b_fc2 = bias_variable([12],"b_fc2")
tf.summary.histogram("weights",w_fc2)
tf.summary.histogram("biases",b_fc2)
#with tf.name_scope("Fully_Conn_L_3"):
#w_fc3 = weight_variable([500,12],"w_fc3")
#b_fc3= bias_variable([12], "b_fc3")
#tf.summary.histogram("weights",w_fc3)
#tf.summary.histogram("biases",b_fc3)
for i in range(0,50000):
for j in range(0,9):
fun_mat[i][j]= fun_mat[i][j]/5
X= fun_mat
y= lab
#(X,y)= (raw_data[0],raw_data[1])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=2)
X_train= np.array(X_train)
X_test= np.array(X_test)
#print(X_train)
X_train=X_train.reshape(X_train.shape[0],9,1)
X_test= X_test.reshape(X_test.shape[0],9,1)
#print(X_train)
Y_train = np_utils.to_categorical(y_train,12)
Y_test = np_utils.to_categorical(y_test,12)
#print(Y_train[1])
keep_prob = tf.placeholder(tf.float32)
def model(data):
with tf.name_scope("MODEL_LAYER_OUTPUTS"):
h_conv1 = tf.nn.relu(conv2d(data, w_conv1)+ b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, w_conv2)+ b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2,[-1,3*4])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,w_fc1) + b_fc1)
h_fc1_drop= tf.nn.dropout(h_fc1,keep_prob)
#h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop,w_fc2) + b_fc2)
#h_fc2_drop= tf.nn.dropout(h_fc2, keep_prob)
tf.summary.histogram("CONV_Layer_1_act ", h_conv1)
tf.summary.histogram("Pool_Layer_1_act ", h_pool1)
tf.summary.histogram("CONV_Layer_2_act ", h_conv2)
tf.summary.histogram("Pool_Layer_1_act ", h_pool2)
tf.summary.histogram("F_C_Layer_1_act ", h_fc1_drop)
#tf.summary.histogram("F_C_Layer_2_act ", h_fc2_drop)
return tf.nn.softmax(tf.matmul(h_fc1_drop,w_fc2) + b_fc2)
#return tf.matmul(h_fc1_drop,w_fc2) + b_fc2
y= model(x)
with tf.name_scope("COST"):
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
cross_entropy = tf.reduce_mean(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y, labels=y_))+0.01*tf.nn.l2_loss(w_fc1) + 0.01*tf.nn.l2_loss(w_fc2))
tf.summary.scalar("COST", cross_entropy)
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
with tf.name_scope("ACCURACY"):
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
tf.summary.scalar("ACCURACY", accuracy)
#sess.run(tf.global_variables_initializer())
summ = tf.summary.merge_all()
saver = tf.train.Saver()
batch_size=2000
with tf.Session() as sess:
#sess.run(init)
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(logs_path, graph= tf.get_default_graph())
#writer.add_graph(sess.graph)
for i in range(1000):
avg_cost=0.
#INDEX GENERATE
index=np.arange(0,40000)
np.random.shuffle(index)
#extra = data_set_size%batch_size
batch_count = 40000//batch_size + 1
for j in range(batch_count):
start = j*batch_size
end_idx = start+ batch_size
if j == (batch_count - 1):
break
swiped_index= index[start:end_idx]
#print(swiped_index)
batch = next_batch(swiped_index, X_train, Y_train)
#print(batch[0])
batcht=np.reshape(batch[0],(batch_size,1,9,1))
_, summary = sess.run([optimizer, summ], feed_dict={x: batcht, y_: batch[1],keep_prob: 0.5})
#avg_cost += c / batch_count
#optimizer.run(feed_dict={x:batcht, y_: batch[1], keep_prob : 0.5})
if i%50 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batcht, y_: batch[1], keep_prob: 1.0})
writer.add_summary(summary, i * batch_count + i)
print("step %d, Training accuracy %g" %( i, train_accuracy))
#print("step %d, COST: %g" %( i, avg_cost))
saver.save(sess, "C:/Users/Nikhil/Desktop/intern 3july/model1_first")
index_rec=np.arange(0,10000)
np.random.shuffle(index_rec)
swiped=index_rec[0:2000]
batch1= next_batch(swiped,X_test,Y_test)
batch1t=np.reshape(batch1[0],(batch_size,1,9,1))
print("test accuracy %g"%accuracy.eval(feed_dict={x: batch1t, y_: batch1[1], keep_prob: 1.0}))

Fit and predict in Tensorflow : Session restore for multiple functions

I started learning tensorflow one week ago and i am struggling a bit with the possibility of restoring a model.
I am constructing a class for a CNN (from stanford course code) where i want to have a run and a test function (first one to train the model, second one to make a prediction on another set of datas).
The function run is working correctly, mais i have a hard time restoring the model to make the prediction in function test (here i have a problem shape and i guess it comes from the way I get the operation).
import os
import tensorflow as tf
from time import time
class Cnn:
def __init__(self, batch_size=128, skip_step=10, epochs=1, dropout_ratio=0.75):
self.batch_size = batch_size
self.skip_step = skip_step
self.epochs = epochs
self.dropout_ratio = dropout_ratio
self.x = tf.placeholder(tf.float32, [None, 784], name="X_placeholder")
self.images = tf.reshape(self.x, shape=[-1, 28, 28, 1])
self.y = tf.placeholder(tf.float32, [None, 10], name="Y_placeholder")
self.dropout = tf.placeholder(tf.float32, name="dropout")
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
self.layers = [self.images]
self.n_layers = 0
def convolution(self, kernel_size, n_output, strides, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name) as scope:
kernel_shape = [kernel_size[0], kernel_size[1], previous_layer.get_shape()[3], n_output]
kernel = tf.get_variable("kernels", kernel_shape,
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
convolution_2d = tf.nn.conv2d(previous_layer, kernel, strides=strides,
padding="SAME")
convolution = tf.nn.relu(convolution_2d + biases, name=scope.name)
self.layers.append(convolution)
self.n_layers += 1
return convolution
def pool(self, size, strides, scope_name, padding="SAME"):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
pool = tf.nn.max_pool(previous_layer, ksize=size, strides=strides,
padding=padding)
self.layers.append(pool)
self.n_layers += 1
return pool
def fully_connected(self, n_input, n_output, scope_name, relu=False, dropout=True):
previous_layer = self.layers[self.n_layers]
with tf.variable_scope(scope_name):
weights = tf.get_variable("weights", [n_input, n_output],
initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", [n_output],
initializer=tf.random_normal_initializer())
pool_reshaped = tf.reshape(previous_layer, [-1, n_input])
logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
fc = logits
if relu:
fc = tf.nn.relu(fc, name="relu")
if dropout:
fc = tf.nn.dropout(fc, self.dropout, name="dropout")
self.layers.append(fc)
self.n_layers += 1
if relu is False and dropout is False:
self.logits = tf.add(tf.matmul(pool_reshaped, weights), biases, name="logits")
self.predictions = tf.nn.softmax(logits, name="predictions")
return fc
def set_loss(self, scope_name):
previous_layer = self.layers[self.n_layers]
with tf.name_scope(scope_name):
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=previous_layer, labels=self.y)
loss = tf.reduce_mean(entropy, name='loss')
self.layers.append(loss)
self.n_layers += 1
self.loss = loss
def set_optimizer(self):
previous_layer = self.layers[self.n_layers]
optimizer = tf.train.AdamOptimizer(0.001).minimize(previous_layer, global_step=self.global_step)
self.layers.append(optimizer)
self.n_layers += 1
self.optimizer = optimizer
def run(self, train_x, train_y):
sess = tf.Session()
with sess.as_default():
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
writer = tf.summary.FileWriter('./my_graph/mnist', sess.graph)
ckpt = tf.train.get_checkpoint_state(os.path.dirname('results/checkpoint'))
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
initial_step = self.global_step.eval()
start_time = time()
n_batches = int(train_x.shape[0] / self.batch_size)
total_correct_preds = 0
total_loss = 0.0
current_position = 0
for index in range(initial_step, int(n_batches * self.epochs)): # train the model n_epochs times
x_batch = train_x[current_position: current_position + self.batch_size, :]
y_batch = train_y[current_position: current_position + self.batch_size, :]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
_, loss_batch, logits_batch = sess.run([self.optimizer, self.loss, self.logits],
feed_dict=feed_dict)
print(logits_batch.shape)
total_loss += loss_batch
preds = tf.nn.softmax(logits_batch)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
if (index + 1) % self.skip_step == 0:
print('Average loss at step {}: {:5.1f}'.format(index + 1, total_loss / self.skip_step))
total_loss = 0.0
saver.save(sess, 'results/mnist-convnet', index)
current_position += self.batch_size
print("Optimization Finished!") # should be around 0.35 after 25 epochs
print("Total time: {0} seconds".format(time() - start_time))
print("Accuracy {0}".format(total_correct_preds / train_x.shape[0]))
def test(self, val_x, val_y):
checkpoint_file = tf.train.latest_checkpoint("results/")
graph = tf.Graph()
with graph.as_default():
sess = tf.Session()
with sess.as_default():
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
self.x = graph.get_operation_by_name("X_placeholder").outputs[0]
self.y = graph.get_operation_by_name("Y_placeholder").outputs[0]
self.dropout = graph.get_operation_by_name("dropout").outputs[0]
self.global_step = graph.get_operation_by_name("global_step").outputs[0]
self.logits = graph.get_operation_by_name("logits").outputs[0]
current_position = 0
n_batches = int(val_x.shape[0] / self.batch_size)
total_correct_preds = 0
for i in range(n_batches):
x_batch = val_x[current_position: current_position + self.batch_size]
y_batch = val_y[current_position: current_position + self.batch_size]
feed_dict = {self.x: x_batch, self.y: y_batch, self.dropout: self.dropout_ratio}
logits_batch = sess.run([self.logits], feed_dict=feed_dict)
preds = tf.nn.softmax(logits_batch)
# correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y_batch, 1))
"""
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
total_correct_preds += sess.run(accuracy)
current_position += self.batch_size
print("Accuracy {0}".format(total_correct_preds / val_x.shape[0]))
"""
and testing it like this (sorry if there is a lot of code) :
from cnn import Cnn
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.set_random_seed(1)
N_CLASSES = 10
# load datas
mnist = input_data.read_data_sets("mnist", one_hot=True)
train_x, train_y = mnist.train.images, mnist.train.labels
val_x, val_y = mnist.test.images, mnist.test.labels
model = Cnn(batch_size=128, skip_step=10, epochs=0.25, dropout_ratio=0.75)
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=32, scope_name="conv1")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool1")
model.convolution(kernel_size=[5, 5], strides=[1, 1, 1, 1], n_output=64, scope_name="conv2")
model.pool(size=[1, 2, 2, 1], strides=[1, 2, 2, 1], scope_name="pool2")
input_features = 7 * 7 * 64
model.fully_connected(n_input=input_features, n_output=1024, scope_name="fc", relu=True, dropout=True)
model.fully_connected(n_input=1024, n_output=N_CLASSES, scope_name="softmax_linear", dropout=False)
model.set_loss(scope_name="loss")
model.set_optimizer()
model.run(train_x=train_x, train_y=train_y)
model.test(val_x, val_y)
I would also appreciate any comments regarding the better way to create a class like this (in particular the handling of the placeholders and the sessions)
Thanks !
Nicolas

failed sess.run error “Cannot feed value of shape (50, 2352) for Tensor 'Placeholder:0', which has shape '(?, 784)'”

Please Help me...
I learning to tensorflow using my own data based on tutorial expert.
following my code:
#datasets define
NUM_CLASSES = 65535
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*1
#read datasets
with open(FLAGS.train, 'r') as f: # train.txt
train_image = []
train_label = []
num = 0
for line in f:
if num == 500:
break
line = line.rstrip()
l = line.split(',')
print(l[0])
img = cv2.imread(l[0])
img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
train_image.append(img.flatten().astype(np.float32)/255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
train_label.append(tmp)
num += 1
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
train_len = len(train_image)
def inference(images_placeholder, keep_prob):
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
x_images = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 1])
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1)
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x2(h_conv2)
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
return y_conv
#learn
with tf.Graph().as_default():
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
keep_prob = tf.placeholder("float")
logits = inference(images_placeholder, keep_prob)
loss_value = loss(logits, labels_placeholder)
train_op = training(loss_value, FLAGS.learning_rate)
print("train_op =", train_op)
acc = accuracy(logits, labels_placeholder)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.initialize_all_variables())
summary_op = tf.merge_all_summaries()
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def)
if train_len % FLAGS.batch_size is 0:
train_batch = train_len/FLAGS.batch_size
else:
train_batch = (train_len/FLAGS.batch_size)+1
print("train_batch = %d",str(train_batch))
for step in range(FLAGS.max_steps):
for i in range(int(train_batch)):
batch = FLAGS.batch_size*i
batch_plus = FLAGS.batch_size*(i+1)
print("batch_plus =", batch_plus)
if batch_plus > train_len: batch_plus = train_len
sess.run(train_op, feed_dict={
images_placeholder: train_image[batch:batch_plus],
labels_placeholder: train_label[batch:batch_plus],
keep_prob: 0.5})
if step % 10 == 0:
train_accuracy = 0.0
for i in range(train_batch):
batch = FLAGS.batch_size*i
batch_plus = FLAGS.batch_size*(i+1)
if batch_plus > train_len: batch_plus = train_len
train_accuracy += sess.run(acc, feed_dict={
images_placeholder: train_image[batch:batch_plus],
labels_placeholder: train_label[batch:batch_plus],
keep_prob: 1.0})
if i is not 0: train_accuracy /= 2.0
#summary_str = sess.run(summary_op, feed_dict={
# images_placeholder: train_image,
# labels_placeholder: train_label,
# keep_prob: 1.0})
#summary_writer.add_summary(summary_str, step)
print("step %d, training accuracy %g",(step, train_accuracy))
if test_len % FLAGS.batch_size is 0:
test_batch = test_len/FLAGS.batch_size
else:
test_batch = (test_len/FLAGS.batch_size)+1
print("test_batch = ",str(test_batch))
test_accuracy = 0.0
for i in range(test_batch):
batch = FLAGS.batch_size*i
batch_plus = FLAGS.batch_size*(i+1)
if batch_plus > train_len: batch_plus = train_len
test_accuracy += sess.run(acc, feed_dict={
images_placeholder: test_image[batch:batch_plus],
labels_placeholder: test_label[batch:batch_plus],
keep_prob: 1.0})
if i is not 0: test_accuracy /= 2.0
print("test accuracy %g",(test_accuracy))
save_path = saver.save(sess, FLAGS.save_model)
but when I try to run it I gives me an error:
ValueError:Cannot feed value of shape (50, 2352) for Tensor 'Placeholder:0', which has shape '(?, 784)'
I feel like i'm overlooking something small but I don't see it.
EDIT: Sorry if you read my previous analysis, it was wrong.
2352 / 3 = 784, I think you have kept the 3 rgb color channels instead of a single pixel intensity (or the resize function has 3 color channels in output by default).
Quite a lot of the complexity of the mnist example is hidden in the data loading, see tensorflow/tensorflow/contrib/learn/python/learn/datasets/mnist.py to understand it better and make something similar for your case. They load images in a 4D tensor [index, x, y, depth] and that's how they can look at a minibatch taking a subset of the indexes.
Good luck !

Categories