I'm working on this classification program where i'm training my model to predict whether the object is a nut or a screw. I created my own dataset since i did not get any. I trained my model but i'm not getting correct predictions. Probability of values go beyond 1, basically i get garbage values.
I get this predicted value: [[9.990779e-01 9.220659e-04]]
#Training code
import dataset
import tensorflow as tf
import time
from datetime import timedelta
import math
import random
import numpy as np
import os
# Adding Seed so that random initialization is consistent
from numpy.random import seed
from tensorflow import set_random_seed
batch_size = 20
# Prepare input data
classes = os.listdir('training_set')
num_classes = len(classes)
# 20% of the data will automatically be used for validation
validation_size = 0.2
img_size = 128
num_channels = 3
train_path = 'training_set'
# We shall load all the training and validation images and labels into
memory using openCV and use that during training
data = dataset.read_train_sets(train_path, img_size, classes,
print("Complete reading input data. Will Now print a snippet of it")
print("Number of files in Training-
print("Number of files in Validation-
session = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size,
num_channels], name='x')
## labels
y_true = tf.placeholder(tf.float32, shape=[None, num_classes],
y_true_cls = tf.argmax(y_true, dimension=1)
##Network graph params
filter_size_conv1 = 3
num_filters_conv1 = 32
filter_size_conv2 = 3
num_filters_conv2 = 32
filter_size_conv3 = 3
num_filters_conv3 = 32
fc_layer_size = 128
def create_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def create_biases(size):
return tf.Variable(tf.constant(0.05, shape=[size]))
def create_convolutional_layer(input,
## We shall define the weights that will be trained using create_weights function.
weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
## We create biases using the create_biases function. These are also trained.
biases = create_biases(num_filters)
## Creating the convolutional layer
layer = tf.nn.conv2d(input=input,
strides=[1, 1, 1, 1],
layer += biases
## We shall be using max-pooling.
layer = tf.nn.max_pool(value=layer,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
## Output of pooling is fed to Relu which is the activation function for us.
layer = tf.nn.relu(layer)
return layer
def create_flatten_layer(layer):
# We know that the shape of the layer will be [batch_size img_size img_size num_channels]
# But let's get it from the previous layer.
layer_shape = layer.get_shape()
## Number of features will be img_height * img_width* num_channels. But we shall calculate it in place of hard-coding it.
num_features = layer_shape[1:4].num_elements()
## Now, we Flatten the layer so we shall have to reshape to num_features
layer = tf.reshape(layer, [-1, num_features])
return layer
def create_fc_layer(input,
# Let's define trainable weights and biases.
weights = create_weights(shape=[num_inputs, num_outputs])
biases = create_biases(num_outputs)
# Fully connected layer takes input x and produces wx+b.Since, these are matrices, we use matmul function in Tensorflow
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return layer
layer_conv1 = create_convolutional_layer(input=x,
layer_conv2 = create_convolutional_layer(input=layer_conv1,
layer_conv3 = create_convolutional_layer(input=layer_conv2,
layer_flat = create_flatten_layer(layer_conv3)
layer_fc1 = create_fc_layer(input=layer_flat,
layer_fc2 = create_fc_layer(input=layer_fc1,
y_pred = tf.nn.softmax(layer_fc2, name='y_pred')
y_pred_cls = tf.argmax(y_pred, dimension=1)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
acc = session.run(accuracy, feed_dict=feed_dict_train)
val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation
Accuracy: {2:>6.1%}, Validation Loss: {3:.3f}"
print(msg.format(epoch + 1, acc, val_acc, val_loss))
total_iterations = 0
saver = tf.train.Saver()
def train(num_iteration):
global total_iterations
for i in range(total_iterations,
total_iterations + num_iteration):
x_batch, y_true_batch, _, cls_batch =
x_valid_batch, y_valid_batch, _, valid_cls_batch =
feed_dict_tr = {x: x_batch,
y_true: y_true_batch}
feed_dict_val = {x: x_valid_batch,
y_true: y_valid_batch}
session.run(optimizer, feed_dict=feed_dict_tr)
if i % int(data.train.num_examples / batch_size) == 0:
val_loss = session.run(cost, feed_dict=feed_dict_val)
epoch = int(i / int(data.train.num_examples / batch_size))
show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss)
saver.save(session, 'C:\\Nutsbolts\\nuts-screws-model')
total_iterations += num_iteration
#Prediction code
import tensorflow as tf
import numpy as np
import os,glob,cv2
import sys,argparse
# First, pass the path of the image
dir_path = 'C:\\nutsbolts\\testing_set\\nuts'
image_path= 'nuts11.jpg'
filename = dir_path +'/' +image_path
images = []
# Reading the image using OpenCV
image = cv2.imread(filename)
# Resizing the image to our desired size and preprocessing will be done
exactly as done during training
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
images = np.array(images, dtype=np.uint8)
images = images.astype('float32')
images = np.multiply(images, 1.0/255.0)
#The input to the network is of shape [None image_size image_size
num_channels]. Hence we reshape.
x_batch = images.reshape(1, image_size,image_size,num_channels)
## Let us restore the saved model
sess = tf.Session()
# Step-1: Recreate the network graph. At this step only graph is created.
saver = tf.train.import_meta_graph('nuts-screws-model.meta')
# Step-2: Now let's load the weights saved using the restore method.
saver.restore(sess, tf.train.latest_checkpoint('./'))
# Accessing the default graph which we have restored
graph = tf.get_default_graph()
# Now, let's get hold of the op that we can be processed to get the output.
# In the original network y_pred is the tensor that is the prediction of the
y_pred = graph.get_tensor_by_name("y_pred:0")
## Let's feed the images to the input placeholders
x= graph.get_tensor_by_name("x:0")
y_true = graph.get_tensor_by_name("y_true:0")
y_test_images = np.zeros((1, len(os.listdir('testing_set'))))
### Creating the feed_dict that is required to be fed to calculate y_pred
feed_dict_testing = {x: x_batch, y_true: y_test_images}
result=sess.run(y_pred, feed_dict=feed_dict_testing)
# result is of this format [probabiliy_of_nuts probability_of_screws]
9.990779e-01 actually is below 1. You could see it as: 9.990779 * (the exponential of -01).
I am testing some TensorFlow code; I'm seeing this error:
AttributeError: module 'tensorflow' has no attribute 'variable_scope'
I am running TensorFlow version 2.1.0.
Here is the code that I am testing.
# imports
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Input data:
# For this tutorial we use the MNIST dataset. MNIST is a dataset of handwritten digits. If you are into machine learning, you might have heard of this dataset by now. MNIST is kind of benchmark of datasets for deep learning. One other reason that we use the MNIST is that it is easily accesible through Tensorflow. If you want to know more about the MNIST dataset you can check Yann Lecun's website. We can easily import the dataset and see the size of training, test and validation set:
# Import MNIST data
# from tensorflow.examples.tutorials.mnist import input_data
#import tensorflow_datasets as tfds
# Construct a tf.data.Dataset
#mnist = tfds.load(name="mnist", split=tfds.Split.TRAIN)
mnist = tf.keras.datasets.mnist
#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
#print("Size of:")
#print("- Training-set:\t\t{}".format(len(mnist.train.labels)))
#print("- Test-set:\t\t{}".format(len(mnist.test.labels)))
#print("- Validation-set:\t{}".format(len(mnist.validation.labels)))
# hyper-parameters
logs_path = "C:/Users/ryans/MNIST_data/logs/embedding/" # path to the folder that we want to save the logs for Tensorboard
learning_rate = 0.001 # The optimization learning rate
epochs = 10 # Total number of training epochs
batch_size = 100 # Training batch size
display_freq = 100 # Frequency of displaying the training results
# Network Parameters
# We know that MNIST images are 28 pixels in each dimension.
img_h = img_w = 28
# Images are stored in one-dimensional arrays of this length.
img_size_flat = img_h * img_w
# Number of classes, one class for each of 10 digits.
n_classes = 10
# number of units in the first hidden layer
h1 = 200
# Graph:
# Like before, we start by constructing the graph. But, we need to define some functions that we need rapidly in our code.
# weight and bais wrappers
def weight_variable(name, shape):
Create a weight variable with appropriate initialization
:param name: weight name
:param shape: weight shape
:return: initialized weight variable
initer = tf.truncated_normal_initializer(stddev=0.01)
return tf.get_variable('W_' + name,
def bias_variable(name, shape):
Create a bias variable with appropriate initialization
:param name: bias variable name
:param shape: bias variable shape
:return: initialized bias variable
initial = tf.constant(0., shape=shape, dtype=tf.float32)
return tf.get_variable('b_' + name,
def fc_layer(x, num_units, name, use_relu=True):
Create a fully-connected layer
:param x: input from previous layer
:param num_units: number of hidden units in the fully-connected layer
:param name: layer name
:param use_relu: boolean to add ReLU non-linearity (or not)
:return: The output array
with tf.variable_scope(name):
in_dim = x.get_shape()[1]
W = weight_variable(name, shape=[in_dim, num_units])
tf.summary.histogram('W', W)
b = bias_variable(name, [num_units])
tf.summary.histogram('b', b)
layer = tf.matmul(x, W)
layer += b
if use_relu:
layer = tf.nn.relu(layer)
return layer
# Now that we have our helper functions we can create our graph.
# Create graph
# Placeholders for inputs (x), outputs(y)
with tf.compat.v1.variable_scope('Input'):
x = tf.compat.v1.placeholder(tf.float32, shape=[None, img_size_flat], name='X')
tf.summary.image('input_image', tf.reshape(x, (-1, img_w, img_h, 1)), max_outputs=5)
y = tf.compat.v1.placeholder(tf.float32, shape=[None, n_classes], name='Y')
fc1 = fc_layer(x, h1, 'Hidden_layer', use_relu=True)
output_logits = fc_layer(fc1, n_classes, 'Output_layer', use_relu=False)
# Define the loss function, optimizer, and accuracy
with tf.compat.v1.variable_scope('Train'):
with tf.compat.v1.variable_scope('Loss'):
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output_logits), name='loss')
tf.summary.scalar('loss', loss)
with tf.compat.v1.variable_scope('Optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='Adam-op').minimize(loss)
with tf.compat.v1.variable_scope('Accuracy'):
correct_prediction = tf.equal(tf.argmax(output_logits, 1), tf.argmax(y, 1), name='correct_pred')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
tf.summary.scalar('accuracy', accuracy)
# Network predictions
cls_prediction = tf.argmax(output_logits, axis=1, name='predictions')
# Initializing the variables
init = tf.global_variables_initializer()
merged = tf.summary.merge_all()
# Session:
# Launch the graph (session)
sess = tf.InteractiveSession() # using InteractiveSession instead of Session to test network in separate cell
train_writer = tf.summary.FileWriter(logs_path, sess.graph)
num_tr_iter = int(mnist.train.num_examples / batch_size)
global_step = 0
for epoch in range(epochs):
print('Training epoch: {}'.format(epoch + 1))
for iteration in range(num_tr_iter):
batch_x, batch_y = mnist.train.next_batch(batch_size)
global_step += 1
# Run optimization op (backprop)
feed_dict_batch = {x: batch_x, y: batch_y}
_, summary_tr = sess.run([optimizer, merged], feed_dict=feed_dict_batch)
train_writer.add_summary(summary_tr, global_step)
if iteration % display_freq == 0:
# Calculate and display the batch loss and accuracy
loss_batch, acc_batch = sess.run([loss, accuracy],
print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
format(iteration, loss_batch, acc_batch))
# Run validation after every epoch
feed_dict_valid = {x: mnist.validation.images, y: mnist.validation.labels}
loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
format(epoch + 1, loss_valid, acc_valid))
I think the code was designed for an earlier version of TensorFlow. I made a few small modifications to get the code to run on my laptop. Here's the part that I am struggling with.
# Placeholders for inputs (x), outputs(y)
with tf.compat.v1.variable_scope('Input'):
x = tf.compat.v1.placeholder(tf.float32, shape=[None, img_size_flat], name='X')
tf.summary.image('input_image', tf.reshape(x, (-1, img_w, img_h, 1)), max_outputs=5)
y = tf.compat.v1.placeholder(tf.float32, shape=[None, n_classes], name='Y')
fc1 = fc_layer(x, h1, 'Hidden_layer', use_relu=True)
output_logits = fc_layer(fc1, n_classes, 'Output_layer', use_relu=False)
The 'with' statement runs, but I am getting an error on this line:
fc1 = fc_layer(x, h1, 'Hidden_layer', use_relu=True)
I thought the change to 'tf.compat.v1' would oversome the issue of different TensorFlow versions, but apparently not.
I found the code sample here.
As placeholder is removed from tensorflow 2.0, compat.v1 must be used. However, another problem is incompatibility and can be solved by using tf.compat.v1.disable_eager_execution() before with tf.compat.v1.variable_scope(...):
In a way, you can turn on the eager execution by calling tf.compat.v1.enable_eager_execution
You may check https://www.tensorflow.org/guide/migrate
I have developed a TensorFlow code that use Adam optimizer, then saved the graph and export the .pb model and correctly loaded it, my problem is when i feed it with new input image i don't get the same result compared to the result given by this code:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img, array_to_img
import cv2
import tensorflow as tf
import numpy
import numpy as np
def get_image2(imgSrc):
img = load_img(imgSrc, True) # this is a PIL image
x = img_to_array(img) # this is a Numpy array with shape (3, 150, 150
#x = x.reshape((1,) + x.shape)
x = x.astype(float)
x *= 1./255.
#x = cv2.resize(x,(512,512))
return x
def sobel2(image):
# Shape = height x width.
#image = tf.placeholder(tf.float32, shape=[None, None])
# Shape = 1 x height x width x 1.
image_resized = image#tf.expand_dims(image, 0)
Gx = tf.nn.conv2d(image_resized, sobel_x_filter, strides=[1, 1, 1, 1], padding='SAME')
Gy = tf.nn.conv2d(image_resized, sobel_y_filter,strides=[1, 1, 1, 1], padding='SAME')
#grad = tf.sqrt(tf.add(tf.pow(Gx,2),tf.pow(Gy,2)))
#grad = tf.pow(Gx,2) + tf.pow(Gy,2)
#grad = tf.truediv(grad,3.)
#grad = tf.reshape(grad, img_shape)
return Gx, Gy
image = get_image2('1.jpg')
img_shape = image.shape
print img_shape
img_h, img_w,_= img_shape
sobel_x = tf.constant([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], tf.float32)
sobel_x_filter = tf.reshape(sobel_x, [3, 3, 1, 1])
sobel_y_filter = tf.transpose(sobel_x_filter, [1, 0, 2, 3])
input_img = tf.placeholder(tf.float32, shape=[1,img_shape[0],img_shape[1],img_shape[2]], name="input_img")
#input_img = tf.placeholder(tf.float32, [1, 512, 512, 1], name="input_img")
gain = tf.Variable(tf.constant(1, dtype=tf.float32, shape=[1,img_shape[0],img_shape[1],img_shape[2]]), name="gain")
offset = tf.Variable(tf.constant(0, dtype=tf.float32, shape=[1,img_shape[0],img_shape[1],img_shape[2]]), name="offset")
enhanced_img = tf.add(tf.multiply(input_img, gain), offset, name = "enahnced")
input_img_deriv_x, input_img_deriv_y = sobel2(input_img)
enhanced_img_deriv_x, enhanced_img_deriv_y = sobel2(enhanced_img)
white_img = tf.constant(1, dtype=tf.float32, shape=[1,img_shape[0],img_shape[1],img_shape[2]])
image_pixels_count = img_h * img_w
white_cost = tf.reduce_sum(tf.pow(enhanced_img - white_img, 2))
sobel_cost = tf.reduce_sum(tf.pow(enhanced_img_deriv_x - input_img_deriv_x, 2) +
tf.pow(enhanced_img_deriv_y - input_img_deriv_y,2))
cost = tf.add(white_cost, tf.multiply(0.2, sobel_cost), name = "cost") # + tf.reduce_sum(gain - 1) + tf.reduce_sum(offset)
# Parameters
learning_rate = 0.0001
training_epochs = 100
display_step = 5
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
image = image.reshape([-1,img_shape[0],img_shape[1],img_shape[2]])
#print image.shape
#print image.shape
feed = {input_img: image }
# Start training
with tf.Session() as sess:
#Run the initializer
# Fit all training data
for epoch in range(training_epochs):
sess.run([optimizer, cost], feed_dict = feed)
if (epoch+1) % display_step == 0:
gen_img = sess.run(enhanced_img, feed_dict = feed)
gen_img = np.squeeze(gen_img, axis=0)
gen_img *= 255
cv2.imwrite("result/output_2_{0}.png".format(epoch), gen_img)
I noticed that when i save the graph the optimizer state is also saved, so when i load the model and feed it with new image he will produce a false result since he will use the saved value related to the image i have used when i saved it.
How i can make the model run the optimizer for new images without using the saved parameters from previous input.
Do you understand how the optimizer actually works ?
The goal of the optimizer is to update the model weights with respect to the gradient. Concerning Adam it has two inner variables which are updated during training, it's part of the adam algorithm. So this behavior is perfectly normal. If you want to "reset" adam variables, it's perfectly doable, however I highly doubt that it's what you want to do... Very rare situations require you to do this. Btw. if you reset adam state, you will break the whole logic of the optimizer.
If you try to evaluate a new image at inference time, the optimizer should not be run, and thus your model output should not be impacted by Adam or any other optimizer.
If you try to continue the training from a preivously saved checkpoint, I would recommend that you keep the Adam state if the dataset is the same (not a transfer learning approach), and thus you should not reset adam's variables.
Btw. if you really want to reset adam, this is how you will do it:
optimizer_reset_op = tf.variables_initializer(optimizer.variables())
import tensorflow as tf
import numpy as np
import os
import re
import PIL
def read_image_label_list(img_directory, folder_name):
# Input:
# -Name of folder (test\\\\train)
# Output:
# -List of names of files in folder
# -Label associated with each file
cat_label = 1
dog_label = 0
filenames = []
labels = []
dir_list = os.listdir(os.path.join(img_directory, folder_name)) # List of all image names in 'folder_name' folder
# Loop through all images in directory
for i, d in enumerate(dir_list):
if re.search("train", folder_name):
if re.search("cat", d): # If image filename contains 'Cat', then true
filenames.append(os.path.join(img_dir, folder_name, d))
return filenames, labels
# Define convolutional layer
def conv_layer(input, channels_in, channels_out):
w_1 = tf.get_variable("weight_conv", [5,5, channels_in, channels_out], initializer=tf.contrib.layers.xavier_initializer())
b_1 = tf.get_variable("bias_conv", [channels_out], initializer=tf.zeros_initializer())
conv = tf.nn.conv2d(input, w_1, strides=[1,1,1,1], padding="SAME")
activation = tf.nn.relu(conv + b_1)
return activation
# Define fully connected layer
def fc_layer(input, channels_in, channels_out):
w_2 = tf.get_variable("weight_fc", [channels_in, channels_out], initializer=tf.contrib.layers.xavier_initializer())
b_2 = tf.get_variable("bias_fc", [channels_out], initializer=tf.zeros_initializer())
activation = tf.nn.relu(tf.matmul(input, w_2) + b_2)
return activation
# Define parse function to make input data to decode image into
def _parse_function(img_path, label):
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_image(img_file, channels=3)
img_decoded = tf.image.resize_images(img_decoded, (28, 28), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
img_decoded = tf.image.per_image_standardization(img_decoded)
img_decoded = tf.cast(img_decoded, dty=tf.float32)
label = tf.one_hot(label, 1)
return img_decoded, label
# Define parameterspe
BATCH_SIZE_training = 64
learning_rate = 0.001
img_dir = 'C:/Users/tharu/PycharmProjects/cat_vs_dog/data'
batch_size = 128
# Define data
features, labels = read_image_label_list(img_dir, "train")
# Define dataset
dataset = tf.data.Dataset.from_tensor_slices((features, labels)) # Takes slices in 0th dimension
dataset = dataset.map(_parse_function)
dataset = dataset.batch(batch_size)
iterator = dataset.make_initializable_iterator()
# Get next batch of data from iterator
x, y = iterator.get_next()
# Create the network (use different variable scopes for reuse of variables)
with tf.variable_scope("conv1"):
conv_1 = conv_layer(x, 3, 32)
pool_1 = tf.nn.max_pool(conv_1, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
with tf.variable_scope("conv2"):
conv_2 = conv_layer(pool_1, 32, 64)
pool_2 = tf.nn.max_pool(conv_2, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
flattened = tf.contrib.layers.flatten(pool_2)
with tf.variable_scope("fc1"):
fc_1 = fc_layer(flattened, 7*7*64, 1024)
with tf.variable_scope("fc2"):
logits = fc_layer(fc_1, 1024, 1)
# Define loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf.cast(y, dtype=tf.int32)))
# Define optimizer
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.Session() as sess:
# Initiliaze all the variables
# Train the network
for i in range(EPOCHS):
# Initialize iterator so that it starts at beginning of training set for each epoch
print("EPOCH", i)
while True:
_, epoch_loss = sess.run([train, loss])
except tf.errors.OutOfRangeError: # Error given when out of data
if i % 2 == 0:
# [train_accuaracy] = sess.run([accuracy])
# print("Step ", i, "training accuracy = %{}".format(train_accuaracy))
I've spent a few hours trying to figure out systematically why I've been getting 0 loss when I run this model.
Features = list of file locations for each image (e.g. ['\data\train\cat.0.jpg', /data\train\cat.1.jpg])
Labels = [Batch_size, 1] one_hot vector
Initially I thought it was because there was something wrong with my data. But I've viewed the data after being resized and the images seems fine.
Then I tried a few different loss functions because I thought maybe I'm misunderstanding what the the tensorflow function softmax_cross_entropy does, but that didn't fix anything.
I've tried running just the 'logits' section to see what the output is. This is just a small sample and the numbers seem fine to me:
[0. ]
Surely then the softmax_cross_entropy function should be able to compute this loss given that the corresponding labels are 0 or 1? I'm not sure if I'm missing something. Any help would be greatly appreciated.
As documented:
logits and labels must have the same shape, e.g. [batch_size, num_classes] and the same dtype (either float16, float32, or float64).
Since you mentioned your label is "[Batch_size, 1] one_hot vector", I would assume both your logits and labels are [Batch_size, 1] shape. This will certainly lead to zero loss. Conceptually speaking, you have only 1 class (num_classes=1) and your cannot be wrong (loss=0).
So at least for you labels, you should transform it: tf.one_hot(indices=labels, depth=num_classes). Your prediction logits should also have a shape [batch_size, num_classes] output.
Alternatively, you can use sparse_softmax_cross_entropy_with_logits, where:
A common use case is to have logits of shape [batch_size, num_classes] and labels of shape [batch_size]. But higher dimensions are supported.
I am trying to reconstruct the input images of my database using stacked autoencoder in tensorflow. If i use mnist database then i can reconstruct input image correctly. But when i apply my own database then i can't reconstruct input images correctly. Here is my code in python
from __future__ import division, print_function, absolute_import
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
import glob
import scipy.misc as ms
import matplotlib.cm as cm
path1 = 'gestures dataset\dataset'
listimg = os.listdir(path1)
num_sample = np.size(listimg)
for j in range(class_no):
lastindex +=i
for i in range(len(labels)):
for image in glob.glob('gestures dataset/dataset/*.ppm'):
img = np.array(Image.open(image).convert('L'))
img = ms.imresize(img, (50, 50), 'nearest')
im_array = np.array(img)
imageread.append(im_array.flatten()) #m*nn
data ,labels=shuffle(originalimage,labels,random_state=2)
test_si ze=0.2,random_state=4)
learning_rate = 0.01
training_epochs = 1000
batch_size = 256
display_step = 1
examples_to_show = 5
n_hidden_1 = 50 # 1st layer num features
n_hidden_2 = 30 # 2nd layer num features
n_hidden_3 = 20
n_input = 2500 # MNIST data input (img shape: 28*28)
X = tf.placeholder("float", [None, n_input])
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2,n_hidden_3])),
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_3,n_hidden_2])),
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_2,n_hidden_1])),
'decoder_h3': tf.Variable(tf.random_normal([n_hidden_1, n_input]))
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
'encoder_b3': tf.Variable(tf.random_normal([n_hidden_3])),
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_2])),
'decoder_b2': tf.Variable(tf.random_normal([n_hidden_1])),
'decoder_b3': tf.Variable(tf.random_normal([n_input]))
# Building the encoder
def encoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
#Encoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2,
return layer_3
# Building the decoder
def decoder(x):
# Decoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,
layer_3 =tf.nn.sigmoid(tf.add(tf.matmul(layer_2,
return layer_3
# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X
# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
for epoch in range(training_epochs):
_, c = sess.run([optimizer, cost], feed_dict={X: x_train})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),
"cost=", "{:.9f}".format(c))
print("Optimization Finished!")
# Applying encode and decode over test set
encode_decode = sess.run(
y_pred, feed_dict={X: x_train[:examples_to_show]})
# Compare original images with their reconstructions
f, a = plt.subplots(2, 5, figsize=(10, 2))
for i in range(examples_to_show):
a[0][i].imshow(np.reshape(x_test[i], (50, 50)),cmap=cm.Greys_r)
a[1][i].imshow(np.reshape(encode_decode[i],(50, 50)),cmap=cm.Greys_r)
here is my input images and corresponding reconstructed images .first row represent input images and second images represent reconstructed images
reconstructed images
My question is how can i remove the noise from the reconstructed images?
I know that this is a very broad question, but I have asked many other questions and I have still been unable to properly implement a simple dynamic-k max pooling convolutional neural network as described in this paper. Currently, I am trying to modify the code from this tutorial. I believe I have successfully implemented the dynamic-k part. However, my main problem is because the k value is different for each input, the tensors that are produced are different shapes. I have tried countless things to try and fix this (which is why you may see some funny reshaping), but I can't figure out how. I think that you'd need to pad each tensor to get them all to be the size of the biggest one, but I can't seem to get that to work. Here is my code (I am sorry, it is generally rather sloppy).
# train.py
import datetime
import time
import numpy as np
import os
import tensorflow as tf
from env.src.sentiment_analysis.dcnn.text_dcnn import TextDCNN
from env.src.sentiment_analysis.cnn import data_helpers as data_helpers
from tensorflow.contrib import learn
# Model Hyperparameters
tf.flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of character embedding (default: 128)")
tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularizaion lambda (default: 0.0)")
# Training parameters
tf.flags.DEFINE_integer("batch_size", 256, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 200, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
tf.flags.DEFINE_string("positive_file", "../rotten_tomatoes/rt-polarity.pos", "Location of the rt-polarity.pos file")
tf.flags.DEFINE_string("negative_file", "../rotten_tomatoes/rt-polarity.neg", "Location of the rt-polarity.neg file")
FLAGS = tf.flags.FLAGS
for attr, value in sorted(FLAGS.__flags.items()):
print("{} = {}".format(attr.upper(), value))
# Data Preparatopn
# Load data
print("Loading data...")
x_text, y = data_helpers.load_data_and_labels(FLAGS.positive_file, FLAGS.negative_file)
# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in x_text])
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
x = np.array(list(vocab_processor.fit_transform(x_text)))
x_arr = np.array(x_text)
seq_lens = []
for s in x_arr:
seq_lens.append(len(s.split(" ")))
# Randomly shuffle data
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]
# Split train/test set
x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:]
y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
# Training
with tf.Graph().as_default():
session_conf = tf.ConfigProto(
sess = tf.Session(config=session_conf)
with sess.as_default():
dcnn = TextDCNN(
filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
# The training procedure
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(1e-4)
grads_and_vars = optimizer.compute_gradients(dcnn.loss)
train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
# Output directory for models and summaries
timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
# Summaries for loss and accuracy
loss_summary = tf.scalar_summary("loss", dcnn.loss)
acc_summary = tf.scalar_summary("accuracy", dcnn.accuracy)
# Summaries for training
train_summary_op = tf.merge_summary([loss_summary, acc_summary])
train_summary_dir = os.path.join(out_dir, "summaries", "train")
train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph)
# Summaries for devs
dev_summary_op = tf.merge_summary([loss_summary, acc_summary])
dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph)
# Checkpointing
checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
checkpoint_prefix = os.path.join(checkpoint_dir, "model")
# TensorFlow assumes this directory already exsists so we need to create it
if not os.path.exists(checkpoint_dir):
saver = tf.train.Saver(tf.all_variables())
# Write vocabulary
vocab_processor.save(os.path.join(out_dir, "vocab"))
# Initialize all variables
def train_step(x_batch, y_batch):
A single training step.
x_batch: A batch of X training values.
y_batch: A batch of Y training values
Returns: void
feed_dict = {
dcnn.input_x: x_batch,
dcnn.input_y: y_batch,
dcnn.dropout_keep_prob: FLAGS.dropout_keep_prob
# Execute train_op
_, step, summaries, loss, accuracy = sess.run(
[train_op, global_step, train_summary_op, dcnn.loss, dcnn.accuracy],
# Print and save to disk loss and accuracy of the current training batch
time_str = datetime.datetime.now().isoformat()
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
train_summary_writer.add_summary(summaries, step)
def dev_step(x_batch, y_batch, writer=None):
Evaluates a model on a dev set.
x_batch: A batch of X training values.
y_batch: A batch of Y training values.
writer: The writer to use to record the loss and accuracy
Returns: void
feed_dict = {
dcnn.input_x: x_batch,
dcnn.input_y: y_batch,
dcnn.dropout_keep_prob : 1.0
step, summaries, loss, accuracy = sess.run(
[global_step, dev_summary_op, dcnn.loss, dcnn.accuracy],
time_str = datetime.datetime.now().isoformat()
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
if writer:
writer.add_summary(summaries, step)
# Generate batches
batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
# Training loop. For each batch...
for batch in batches:
x_batch, y_batch = zip(*batch)
train_step(x_batch, y_batch)
current_step = tf.train.global_step(sess, global_step)
if current_step % FLAGS.evaluate_every == 0:
dev_step(x_dev, y_dev, writer=dev_summary_writer)
if current_step % FLAGS.checkpoint_every == 0:
path = saver.save(sess, checkpoint_prefix, global_step=current_step)
print("Saved model checkpoint to {}\n".format(path))
And here is the actual DCNN class:
import tensorflow as tf
class TextDCNN(object):
A CNN for NLP tasks. Architecture is as follows:
Embedding layer, conv layer, max-pooling and softmax layer
def __init__(self, sequence_lengths, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters):
Makes a new CNNClassifier
sequence_length: The length of each sentence
num_classes: Number of classes in the output layer (positive and negative would be 2 classes)
vocab_size: The size of the vocabulary, needed to define the size of the embedding layer
embedding_size: Dimensionality of the embeddings
filter_sizes: Number of words the convolutional filters will cover, there will be num_filters for each size
num_filters: The number of filters per filter size.
Returns: A new CNNClassifier with the given parameters.
# Define the inputs and the dropout
self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
# Runs the operations on the CPU and organizes them into an embedding scope
with tf.device("/cpu:0"), tf.name_scope("embedding"):
W = tf.Variable( # Make a 4D tensor to store batch, width, height, and channel
tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
with tf.name_scope("conv-maxpool-%s" % filter_size):
# Conv layer
filter_shape = [filter_size, embedding_size, 1, num_filters]
# W is the filter matrix
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(
strides=[1, 1, 1, 1],
# Apply nonlinearity
h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
# Max-pooling layer over the outputs
print(sequence_lengths[i] - filter_size + 1)
pooled = tf.nn.max_pool(
ksize=[1, sequence_lengths[i] - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
pooled = tf.reshape(pooled, [-1, 1, 1, num_filters])
# Combine all of the pooled features
num_filters_total = num_filters * len(filter_sizes)
max_shape = tf.reduce_max(pooled_outputs, 1)
print([p.get_shape() for p in pooled_outputs])
# pooled_outputs = [tf.pad(p, [[0, int(max_shape.get_shape()[0]) - int(p.get_shape()[0])], [0, 0], [0, 0], [0, 0]]) for p in pooled_outputs]
# pooled_outputs = [tf.reshape(p, [-1, 1, 1, num_filters]) for p in pooled_outputs]
# pooled_outputs = [tf.reshape(out, [-1, 1, 1, self.max_length]) for out in pooled_outputs]
self.h_pool = tf.concat(3, pooled_outputs)
self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
self.h_pool_flat = tf.reshape(self.h_pool, [max(sequence_lengths), num_filters_total])
# Add dropout
with tf.name_scope("dropout"):
# casted = tf.cast(self.dropout_keep_prob, tf.int32)
self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)
self.h_drop = tf.reshape(self.h_drop, [-1, num_filters_total])
# Do raw predictions (no softmax)
with tf.name_scope("output"):
W = tf.Variable(tf.truncated_normal([num_filters_total, num_classes], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
# xw_plus_b(...) is just Wx + b matmul alias
self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
self.predictions = tf.argmax(self.scores, 1, name="predictions")
# Calculate mean cross-entropy loss
with tf.name_scope("loss"):
# softmax_cross_entropy_with_logits(...) calculates cross-entropy loss
losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y)
self.loss = tf.reduce_mean(losses)
# Calculate accuracy
with tf.name_scope("accuracy"):
correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
I am using the Rotten Tomatoes sentiment labeled data set. The current error I am getting is this:
InvalidArgumentError (see above for traceback): input[1,0] mismatch: 5888 vs. 4864
[[Node: gradients/concat_grad/ConcatOffset = ConcatOffset[N=3, _device="/job:localhost/replica:0/task:0/cpu:0"](concat/concat_dim, gradients/concat_grad/ShapeN, gradients/concat_grad/ShapeN:1, gradients/concat_grad/ShapeN:2)]]
How can I fix this code so that all of the tensors are normalized to the same size after pooling (while keeping pooling dynamic) and so that the code runs to completion?
Sorry about all of the random commented out lines and prints and stuff, but I have tried extensively to make this work.
Although tensorflow doesn't provide k-max pooling directly, I think tf.nn.top_k might help you build that op.
There are three things to note here.
max-pooling and k-max pooling are two different operations.
max-pooling retrieves the maximum valued activation out of the pooling window while k-max pooling retrieves k maximum values from the pooling window.
Tensorflow doesn't provide API for k-max pooling as of now. The one
which you are trying now is max-pooling operation and not k-max
pooling operation.
As per my knowledge, tensorflow does not provide functionality to handle pooling resulting in different size of matrices. So, you may use bucketing to create batches of sentences of similar length and the use k-max pooling.