I am using tensorflow version 1.5 on Windows 10. I am using the Tensorflow slim model of Inception V4 network which has been picked up from the Github page, using their pretrained weights and adding my own layers at the end to classify 120 different objects.The size of my training dataset is around 10,000 images each of 299*299*3. This is the complete code except the lines containing the import modules and dataset paths.
with slim.arg_scope(inception_blocks_v4.inception_v4_arg_scope()):
X_input = tf.placeholder(tf.float32, shape = (None, image_size, image_size, 3))
Y_label = tf.placeholder(tf.float32, shape = (None, num_classes))
targets = convert_to_onehot(labels_dir, no_of_features = num_classes)
targets = tf.convert_to_tensor(targets, dtype = tf.float32)
images = glob.glob(images_file_path)
i = 0
for my_img in images:
image = mpimg.imread(my_img)[:, :, :3]
image = tf.convert_to_tensor(image, dtype = tf.float32)
logits, end_points = inception_blocks_v4.inception_v4(inputs = X_input, num_classes = pre_num_classes, is_training = True, create_aux_logits= False)
pretrained_weights = slim.assign_from_checkpoint_fn(ckpt_dir, slim.get_model_variables('InceptionV4'))
with tf.Session() as sess:
#MY LAYERS, add bias as well
my_layer = slim.fully_connected(logits, 560, activation_fn=tf.nn.relu, scope='myLayer1', weights_initializer = tf.truncated_normal_initializer(stddev = 0.001), weights_regularizer=slim.l2_regularizer(0.00005),biases_initializer = tf.truncated_normal_initializer(stddev=0.001), biases_regularizer=slim.l2_regularizer(0.00005))
my_layer = slim.dropout(my_layer, keep_prob = 0.6, scope = 'myLayer2')
my_layer = slim.fully_connected(my_layer, num_classes,activation_fn = tf.nn.relu,scope= 'myLayer3', weights_initializer = tf.truncated_normal_initializer(stddev=0.001), weights_regularizer=slim.l2_regularizer(0.00005), biases_initializer = tf.truncated_normal_initializer(stddev=0.001), biases_regularizer=slim.l2_regularizer(0.00005))
my_layer_logits = slim.fully_connected(my_layer, num_classes, activation_fn=None,scope='myLayer4')
loss = tf.losses.softmax_cross_entropy(onehot_labels = Y_label, logits = my_layer_logits)
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train_op = optimizer.minimize(loss)
batch_size = 8
with tf.Session() as sess:
for i in range(100):
images, labels = tf.train.batch([Images, targets], batch_size = batch_size, num_threads = 1, capacity = (4*batch_size), enqueue_many=True)
print (images) #To check their shape
print (labels)
train_op.run(feed_dict = {X_input:images.eval(session = sess) ,Y_label:labels.eval(session = sess)})
print (i)
I used the print(i) statement to keep track of how many epochs are done. After running the script for more than 3 hours, not even a single epoch of training is completed. It seems that it gets stuck at train_op.run() step. I don't know what is the problem.
I am trying to train encoder decoder model with multispectral images having 9 channels but the code that i am running is downloading pretrained resnet101 weights which is trained on 3 channel images.
Input Given by me:
net_input = tf.placeholder(tf.float32,shape=[None,None,None,9])
net_output = tf.placeholder(tf.float32,shape=[None,None,None,num_classes])
code for getting pretrained weights for Resnet101:
if args.model == "ResNet101" or args.model == "ALL":
subprocess.check_output(['wget','http://download.tensorflow.org/models/resnet_v2_101_2017_04_14.tar.gz', "-P", "models"])
subprocess.check_output(['tar', '-xvf', 'models/resnet_v2_101_2017_04_14.tar.gz', "-C", "models"])
subprocess.check_output(['rm', 'models/resnet_v2_101_2017_04_14.tar.gz'])
except Exception as e:
error that i am getting is:
Invalid argument: Assign requires shapes of both tensors to match. lhs shape= [7,7,9,64] rhs
shape= [7,7,3,64]
what can be the solution here?
If you do not want to change the channels of input from 9 to 3, you need to change ResNet architecture input and second layer from 3 to 9 channels and add the final layers for inference. Notice, you will have to train it again.
Here is a full code, as an example, you just have to change channels to 9:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
IMG_SIZE = (160, 160)
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir,
validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_dir,
class_names = train_dataset.class_names
val_batches = tf.data.experimental.cardinality(validation_dataset)
test_dataset = validation_dataset.take(val_batches // 5)
validation_dataset = validation_dataset.skip(val_batches // 5)
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.ResNet50.preprocess_input
# Create the base model from the pre-trained model ResNet50
############### HERE YOU CHANGE TO 9 CHANNELS ###############
IMG_SHAPE = IMG_SIZE + (channels,)
base_model = tf.keras.applications.ResNet50(input_shape=IMG_SHAPE,
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)
base_model_config = base_model.get_config()
base_model_config['layers'][0]["config"]["batch_input_shape"]=(None, 160, 160, channels)
base_model_config['layers'][1]["config"]["padding"]=((channels,channels), (channels,channels))
inputs = tf.keras.Input(shape=(160, 160, channels))
x = base_model(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs, outputs)
base_learning_rate = 0.0001
model.fit(x=train_dataset, epochs=2)
If there is any shape mismatch in the middle of the way, you know how to change it: base_model_config['layers'][x]....=........
There are a few resources about this idea, such as a blog post about transferring a ResNet on RGB data to multi-channel images here, and a relevant Colab Notebook. Below is a working example based on those resources:
import numpy as np
import tensorflow as tf
def tile_kernels(kernel, out_channels, batch_dim=-2):
mean_1d = np.mean(kernel, axis=batch_dim).reshape(kernel[:, :, -1:, :].shape)
tiled = np.tile(mean_1d, (out_channels, 1))
return tiled
def reshape_model_input(model_orig, custom_model, input_channels):
conf = custom_model.get_config()
layer_to_modify = conf["layers"][2]["config"]["name"]
layer_names = [conf['layers'][x]['name'] for x in range(len(conf['layers']))]
for layer in model_orig.layers:
if layer.name in layer_names:
if layer.get_weights() != []:
target_layer = custom_model.get_layer(layer.name)
if layer.name == layer_to_modify:
kernels, biases = layer.get_weights()
kernels_extra_channels = np.concatenate((kernels,
tile_kernels(kernels, input_channels - 3)),
target_layer.set_weights([kernels_extra_channels, biases])
if __name__ == "__main__":
from tensorflow.keras.applications import ResNet50V2
resnet50 = ResNet50V2(weights='imagenet', include_top=False) # load resnet50 here - can be done differently
config = resnet50.get_config()
img_height = ...
img_width = ...
input_channels = 7
config["layers"][0]["config"]["batch_input_shape"] = (None, img_height, img_width, input_channels) # change the batch input shape to handle the different channel dimensions
custom_resnet = tf.keras.models.Model.from_config(config)
reshape_model_input(resnet50, custom_resnet, input_channels) # modify the custom model by reference
custom_resnet(np.zeros((1, img_width, img_height, input_channels))) # just verifying that predicting with the new shape works in the custom model
This process just iterates over each layer in the original model and sets the corresponding weights in the custom model. To produce the additional n 3 x 3 channels (in your case, n = 4, as you want 7 total channels) for the input, the mean is taken across the 3 RGB dimensions then replicated (as can be seen in the tile_kernels function). Another aggregation function could be used, such as the max, min, median, etc. If you don't want any of the weights from the original model (as in, not pretraining but just require the architecture), just modifying the original model's configuration and creating a new model from it will create a randomly initialized model:
resnet50 = ...
config = resnet50.get_config()
img_height = ...
img_width = ...
input_channels = ...
config["layers"][0]["config"]["batch_input_shape"] = (None, img_height, img_width, input_channels)
custom_resnet = tf.keras.models.Model.from_config(config)
I'm working on this classification program where i'm training my model to predict whether the object is a nut or a screw. I created my own dataset since i did not get any. I trained my model but i'm not getting correct predictions. Probability of values go beyond 1, basically i get garbage values.
I get this predicted value: [[9.990779e-01 9.220659e-04]]
#Training code
import dataset
import tensorflow as tf
import time
from datetime import timedelta
import math
import random
import numpy as np
import os
# Adding Seed so that random initialization is consistent
from numpy.random import seed
from tensorflow import set_random_seed
batch_size = 20
# Prepare input data
classes = os.listdir('training_set')
num_classes = len(classes)
# 20% of the data will automatically be used for validation
validation_size = 0.2
img_size = 128
num_channels = 3
train_path = 'training_set'
# We shall load all the training and validation images and labels into
memory using openCV and use that during training
data = dataset.read_train_sets(train_path, img_size, classes,
print("Complete reading input data. Will Now print a snippet of it")
print("Number of files in Training-
print("Number of files in Validation-
session = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size,
num_channels], name='x')
## labels
y_true = tf.placeholder(tf.float32, shape=[None, num_classes],
y_true_cls = tf.argmax(y_true, dimension=1)
##Network graph params
filter_size_conv1 = 3
num_filters_conv1 = 32
filter_size_conv2 = 3
num_filters_conv2 = 32
filter_size_conv3 = 3
num_filters_conv3 = 32
fc_layer_size = 128
def create_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def create_biases(size):
return tf.Variable(tf.constant(0.05, shape=[size]))
def create_convolutional_layer(input,
## We shall define the weights that will be trained using create_weights function.
weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
## We create biases using the create_biases function. These are also trained.
biases = create_biases(num_filters)
## Creating the convolutional layer
layer = tf.nn.conv2d(input=input,
strides=[1, 1, 1, 1],
layer += biases
## We shall be using max-pooling.
layer = tf.nn.max_pool(value=layer,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
## Output of pooling is fed to Relu which is the activation function for us.
layer = tf.nn.relu(layer)
return layer
def create_flatten_layer(layer):
# We know that the shape of the layer will be [batch_size img_size img_size num_channels]
# But let's get it from the previous layer.
layer_shape = layer.get_shape()
## Number of features will be img_height * img_width* num_channels. But we shall calculate it in place of hard-coding it.
num_features = layer_shape[1:4].num_elements()
## Now, we Flatten the layer so we shall have to reshape to num_features
layer = tf.reshape(layer, [-1, num_features])
return layer
def create_fc_layer(input,
# Let's define trainable weights and biases.
weights = create_weights(shape=[num_inputs, num_outputs])
biases = create_biases(num_outputs)
# Fully connected layer takes input x and produces wx+b.Since, these are matrices, we use matmul function in Tensorflow
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return layer
layer_conv1 = create_convolutional_layer(input=x,
layer_conv2 = create_convolutional_layer(input=layer_conv1,
layer_conv3 = create_convolutional_layer(input=layer_conv2,
layer_flat = create_flatten_layer(layer_conv3)
layer_fc1 = create_fc_layer(input=layer_flat,
layer_fc2 = create_fc_layer(input=layer_fc1,
y_pred = tf.nn.softmax(layer_fc2, name='y_pred')
y_pred_cls = tf.argmax(y_pred, dimension=1)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
acc = session.run(accuracy, feed_dict=feed_dict_train)
val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation
Accuracy: {2:>6.1%}, Validation Loss: {3:.3f}"
print(msg.format(epoch + 1, acc, val_acc, val_loss))
total_iterations = 0
saver = tf.train.Saver()
def train(num_iteration):
global total_iterations
for i in range(total_iterations,
total_iterations + num_iteration):
x_batch, y_true_batch, _, cls_batch =
x_valid_batch, y_valid_batch, _, valid_cls_batch =
feed_dict_tr = {x: x_batch,
y_true: y_true_batch}
feed_dict_val = {x: x_valid_batch,
y_true: y_valid_batch}
session.run(optimizer, feed_dict=feed_dict_tr)
if i % int(data.train.num_examples / batch_size) == 0:
val_loss = session.run(cost, feed_dict=feed_dict_val)
epoch = int(i / int(data.train.num_examples / batch_size))
show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss)
saver.save(session, 'C:\\Nutsbolts\\nuts-screws-model')
total_iterations += num_iteration
#Prediction code
import tensorflow as tf
import numpy as np
import os,glob,cv2
import sys,argparse
# First, pass the path of the image
dir_path = 'C:\\nutsbolts\\testing_set\\nuts'
image_path= 'nuts11.jpg'
filename = dir_path +'/' +image_path
images = []
# Reading the image using OpenCV
image = cv2.imread(filename)
# Resizing the image to our desired size and preprocessing will be done
exactly as done during training
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
images = np.array(images, dtype=np.uint8)
images = images.astype('float32')
images = np.multiply(images, 1.0/255.0)
#The input to the network is of shape [None image_size image_size
num_channels]. Hence we reshape.
x_batch = images.reshape(1, image_size,image_size,num_channels)
## Let us restore the saved model
sess = tf.Session()
# Step-1: Recreate the network graph. At this step only graph is created.
saver = tf.train.import_meta_graph('nuts-screws-model.meta')
# Step-2: Now let's load the weights saved using the restore method.
saver.restore(sess, tf.train.latest_checkpoint('./'))
# Accessing the default graph which we have restored
graph = tf.get_default_graph()
# Now, let's get hold of the op that we can be processed to get the output.
# In the original network y_pred is the tensor that is the prediction of the
y_pred = graph.get_tensor_by_name("y_pred:0")
## Let's feed the images to the input placeholders
x= graph.get_tensor_by_name("x:0")
y_true = graph.get_tensor_by_name("y_true:0")
y_test_images = np.zeros((1, len(os.listdir('testing_set'))))
### Creating the feed_dict that is required to be fed to calculate y_pred
feed_dict_testing = {x: x_batch, y_true: y_test_images}
result=sess.run(y_pred, feed_dict=feed_dict_testing)
# result is of this format [probabiliy_of_nuts probability_of_screws]
9.990779e-01 actually is below 1. You could see it as: 9.990779 * (the exponential of -01).
so I am new with machine learning and I got a bonus course at my university where I have to train a lstm model to generate captions. I have read this so far: Blogpost_about_lstms
And used this as reference: some_random_code
So what I want to achieve:
I have an Dataset which is structured like this:
output from an CNN with a Vector on size 2048 that holds some "features" of an image. And 5 Captions describing that image.
input: CNN vector + Captions
output: Caption (guess)
input: CNN vector
output: caption (guess)
So how can I use 2 Inputs (the CNN data and a Caption Sequence) to train to generate new captions only from an CNN input vector!
This is kinda tricky and I cannot grasp the theory in this. And Tensorflow is also quite a thing I have to say.
I have a normal Seq_2_Seq model in place that works. But now I am stuck :/
class Model(object):
def __init__(self, _input, is_training, hidden_size, vocab_size, num_layers,
dropout=config.trainer.dropout, init_scale=config.trainer.init_scale):
self.is_training = is_training
self.input_obj = _input
self.batch_size = _input.batch_size
self.num_steps = _input.num_steps
self.hidden_size = hidden_size
# create the word embeddings
with tf.device("/cpu:0"):
randomized = tf.random_uniform([vocab_size, hidden_size], -init_scale, init_scale)
print("randomized: ", randomized)
embedding = tf.Variable(randomized)
inputs = tf.nn.embedding_lookup(embedding, self.input_obj.input_data)
if is_training and dropout < 1:
inputs = tf.nn.dropout(inputs, dropout)
# set up the state storage / extraction
self.init_state = tf.placeholder(tf.float32, [num_layers, 2, self.batch_size, hidden_size])
state_per_layer_list = tf.unstack(self.init_state, axis=0)
rnn_tuple_state = tuple([tf.contrib.rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1])for idx in range(num_layers)])
# create an LSTM cell to be unrolled
print("Hidden size: ", hidden_size)
cell = tf.contrib.rnn.LSTMCell(hidden_size, forget_bias=config.trainer.forget_bias)
# add a dropout wrapper if training
if is_training and dropout < 1:
cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=dropout)
if num_layers > 1:
cell = tf.contrib.rnn.MultiRNNCell([cell for _ in range(num_layers)], state_is_tuple=True)
print("input: ", inputs)
output, self.state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32, initial_state=rnn_tuple_state)
# reshape to (batch_size * num_steps, hidden_size)
output = tf.reshape(output, [-1, hidden_size])
softmax_w = tf.Variable(tf.random_uniform([hidden_size, vocab_size], -init_scale, init_scale))
softmax_b = tf.Variable(tf.random_uniform([vocab_size], -init_scale, init_scale))
logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
# Reshape logits to be a 3-D tensor for sequence loss
logits = tf.reshape(logits, [self.batch_size, self.num_steps, vocab_size])
# Use the contrib sequence loss and average over the batches
loss = tf.contrib.seq2seq.sequence_loss(logits,
tf.ones([self.batch_size, self.num_steps], dtype=tf.float32),
# Update the cost
self.cost = tf.reduce_sum(loss)
# get the prediction accuracy
self.softmax_out = tf.nn.softmax(tf.reshape(logits, [-1, vocab_size]))
self.predict = tf.cast(tf.argmax(self.softmax_out, axis=1), tf.int32)
correct_prediction = tf.equal(self.predict, tf.reshape(self.input_obj.targets, [-1]))
self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
if not is_training:
self.learning_rate = tf.Variable(0.01, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5)
optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
self.train_op = optimizer.apply_gradients(zip(grads, tvars),
self.new_lr = tf.placeholder(tf.float32, shape=[])
self.lr_update = tf.assign(self.learning_rate, self.new_lr)
def assign_lr(self, session, lr_value):
session.run(self.lr_update, feed_dict={self.new_lr: lr_value})
I don't need a solution but some explanation how to move forward would be awesome!!
import tensorflow as tf
import numpy as np
import os
import re
import PIL
def read_image_label_list(img_directory, folder_name):
# Input:
# -Name of folder (test\\\\train)
# Output:
# -List of names of files in folder
# -Label associated with each file
cat_label = 1
dog_label = 0
filenames = []
labels = []
dir_list = os.listdir(os.path.join(img_directory, folder_name)) # List of all image names in 'folder_name' folder
# Loop through all images in directory
for i, d in enumerate(dir_list):
if re.search("train", folder_name):
if re.search("cat", d): # If image filename contains 'Cat', then true
filenames.append(os.path.join(img_dir, folder_name, d))
return filenames, labels
# Define convolutional layer
def conv_layer(input, channels_in, channels_out):
w_1 = tf.get_variable("weight_conv", [5,5, channels_in, channels_out], initializer=tf.contrib.layers.xavier_initializer())
b_1 = tf.get_variable("bias_conv", [channels_out], initializer=tf.zeros_initializer())
conv = tf.nn.conv2d(input, w_1, strides=[1,1,1,1], padding="SAME")
activation = tf.nn.relu(conv + b_1)
return activation
# Define fully connected layer
def fc_layer(input, channels_in, channels_out):
w_2 = tf.get_variable("weight_fc", [channels_in, channels_out], initializer=tf.contrib.layers.xavier_initializer())
b_2 = tf.get_variable("bias_fc", [channels_out], initializer=tf.zeros_initializer())
activation = tf.nn.relu(tf.matmul(input, w_2) + b_2)
return activation
# Define parse function to make input data to decode image into
def _parse_function(img_path, label):
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_image(img_file, channels=3)
img_decoded = tf.image.resize_images(img_decoded, (28, 28), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
img_decoded = tf.image.per_image_standardization(img_decoded)
img_decoded = tf.cast(img_decoded, dty=tf.float32)
label = tf.one_hot(label, 1)
return img_decoded, label
# Define parameterspe
BATCH_SIZE_training = 64
learning_rate = 0.001
img_dir = 'C:/Users/tharu/PycharmProjects/cat_vs_dog/data'
batch_size = 128
# Define data
features, labels = read_image_label_list(img_dir, "train")
# Define dataset
dataset = tf.data.Dataset.from_tensor_slices((features, labels)) # Takes slices in 0th dimension
dataset = dataset.map(_parse_function)
dataset = dataset.batch(batch_size)
iterator = dataset.make_initializable_iterator()
# Get next batch of data from iterator
x, y = iterator.get_next()
# Create the network (use different variable scopes for reuse of variables)
with tf.variable_scope("conv1"):
conv_1 = conv_layer(x, 3, 32)
pool_1 = tf.nn.max_pool(conv_1, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
with tf.variable_scope("conv2"):
conv_2 = conv_layer(pool_1, 32, 64)
pool_2 = tf.nn.max_pool(conv_2, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
flattened = tf.contrib.layers.flatten(pool_2)
with tf.variable_scope("fc1"):
fc_1 = fc_layer(flattened, 7*7*64, 1024)
with tf.variable_scope("fc2"):
logits = fc_layer(fc_1, 1024, 1)
# Define loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf.cast(y, dtype=tf.int32)))
# Define optimizer
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.Session() as sess:
# Initiliaze all the variables
# Train the network
for i in range(EPOCHS):
# Initialize iterator so that it starts at beginning of training set for each epoch
print("EPOCH", i)
while True:
_, epoch_loss = sess.run([train, loss])
except tf.errors.OutOfRangeError: # Error given when out of data
if i % 2 == 0:
# [train_accuaracy] = sess.run([accuracy])
# print("Step ", i, "training accuracy = %{}".format(train_accuaracy))
I've spent a few hours trying to figure out systematically why I've been getting 0 loss when I run this model.
Features = list of file locations for each image (e.g. ['\data\train\cat.0.jpg', /data\train\cat.1.jpg])
Labels = [Batch_size, 1] one_hot vector
Initially I thought it was because there was something wrong with my data. But I've viewed the data after being resized and the images seems fine.
Then I tried a few different loss functions because I thought maybe I'm misunderstanding what the the tensorflow function softmax_cross_entropy does, but that didn't fix anything.
I've tried running just the 'logits' section to see what the output is. This is just a small sample and the numbers seem fine to me:
[0. ]
Surely then the softmax_cross_entropy function should be able to compute this loss given that the corresponding labels are 0 or 1? I'm not sure if I'm missing something. Any help would be greatly appreciated.
As documented:
logits and labels must have the same shape, e.g. [batch_size, num_classes] and the same dtype (either float16, float32, or float64).
Since you mentioned your label is "[Batch_size, 1] one_hot vector", I would assume both your logits and labels are [Batch_size, 1] shape. This will certainly lead to zero loss. Conceptually speaking, you have only 1 class (num_classes=1) and your cannot be wrong (loss=0).
So at least for you labels, you should transform it: tf.one_hot(indices=labels, depth=num_classes). Your prediction logits should also have a shape [batch_size, num_classes] output.
Alternatively, you can use sparse_softmax_cross_entropy_with_logits, where:
A common use case is to have logits of shape [batch_size, num_classes] and labels of shape [batch_size]. But higher dimensions are supported.
I am currently trying to train my model to categorize the cifar-10 dataset. I read the data like this:
def convert_images(raw):
raw_float = np.array(raw, dtype = float)
images = raw_float.reshape([-1,3,32,32])
images = images.transpose([0,2,3,1])
return images
def load_data(filename):
data = unpickle(filename)
raw_images = data[b'data']
labels = np.array(data[b'labels'])
images = convert_images(raw_images)
return images, labels
def load_training_data():
images = np.zeros(shape=[50000,32,32,3], dtype = float)
labels = np.zeros(shape = [50000], dtype = int)
begin = 0
for i in range(5):
filename = "data_batch_" + str(i+1)
images_batch, labels_batch = load_data(filename)
num_images = len(images_batch)
end = begin + num_images
images[begin:end, :] = images_batch
labels[begin:end] = labels_batch
begin = end
return images, labels, OneHotEncoder(categorical_features=labels, n_values=10)
What this does is reshape the data so that it is a 4d array with 32x32x3 values for the pixels and rgb colors. I define my model like this (i first reshape X to be a row vector because the 4d array creates errors):
X = tf.placeholder(tf.float32, [None,32,32,3])
Y_labeled = tf.placeholder(tf.int32, [None])
data = load_training_data()
with tf.name_scope('dnn'):
XX = tf.reshape(X, [-1,3072])
hidden1 = tf.layers.dense(XX, 300, name = 'hidden1', activation = tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, 200, name = 'hidden2', activation = tf.nn.relu)
hidden3 = tf.layers.dense(hidden2, 200, name = 'hidden3', activation = tf.nn.relu)
hidden4 = tf.layers.dense(hidden3, 100, name = 'hidden4', activation = tf.nn.relu)
logits = tf.layers.dense(hidden4, 10, name = 'outputs')
with tf.name_scope('loss'):
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = (Y_labeled), logits = logits)
loss = tf.reduce_mean(cross_entropy, name = 'loss')
learning_rate = 0.01
with tf.name_scope('train'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope('eval'):
correct = tf.nn.in_top_k(logits,Y_labeled, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
batch_size = 100
n_epochs = 50
with tf.Session() as sess:
for epoch in range(n_epochs):
for iteration in range(50000 // batch_size):
X_batch = data[0][iteration*batch_size:(iteration+1)*batch_size]
y_batch = data[1][iteration*batch_size:(iteration+1)*batch_size]
#X_batch, y_batch = data.train.next_batch(batch_size)
sess.run(training_op, feed_dict = {X: X_batch,Y_labeled: y_batch})
acc_train = accuracy.eval(feed_dict = {X: X_batch,Y_labeled: y_batch})
print(epoch, "train accuracy:", acc_train, "loss", loss)
I want to define a simple model that has 4 hidden layers. When I run this it compiles with no errors and starts "training", but the accuracy is 0.0 and it does not print any losses. I am not sure if the error is in my calculation of accuracy and loss or in my definition of the model.
There seem to be a problem with the way you feed your labels. When you create the placholder Y_labeled = tf.placeholder(tf.int32, [None, 10]) it seems to be a vector of dimension 10 but later when you create the label numpy tensor labels = np.zeros(shape = [50000], dtype = int) it seems to be a scalar.
This is why you have this error, the placeholder needs to be fed with a tensor of dimension (batch_size, 10) but you feed it with (batch_size, 0)