I am trying to train encoder decoder model with multispectral images having 9 channels but the code that i am running is downloading pretrained resnet101 weights which is trained on 3 channel images.
Input Given by me:
net_input = tf.placeholder(tf.float32,shape=[None,None,None,9])
net_output = tf.placeholder(tf.float32,shape=[None,None,None,num_classes])
code for getting pretrained weights for Resnet101:
if args.model == "ResNet101" or args.model == "ALL":
subprocess.check_output(['wget','http://download.tensorflow.org/models/resnet_v2_101_2017_04_14.tar.gz', "-P", "models"])
try:
subprocess.check_output(['tar', '-xvf', 'models/resnet_v2_101_2017_04_14.tar.gz', "-C", "models"])
subprocess.check_output(['rm', 'models/resnet_v2_101_2017_04_14.tar.gz'])
except Exception as e:
print(e)
pass
error that i am getting is:
error:
Invalid argument: Assign requires shapes of both tensors to match. lhs shape= [7,7,9,64] rhs
shape= [7,7,3,64]
what can be the solution here?
If you do not want to change the channels of input from 9 to 3, you need to change ResNet architecture input and second layer from 3 to 9 channels and add the final layers for inference. Notice, you will have to train it again.
Here is a full code, as an example, you just have to change channels to 9:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
BATCH_SIZE = 32
IMG_SIZE = (160, 160)
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE)
validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_dir,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE)
class_names = train_dataset.class_names
val_batches = tf.data.experimental.cardinality(validation_dataset)
test_dataset = validation_dataset.take(val_batches // 5)
validation_dataset = validation_dataset.skip(val_batches // 5)
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)
preprocess_input = tf.keras.applications.ResNet50.preprocess_input
# Create the base model from the pre-trained model ResNet50
############### HERE YOU CHANGE TO 9 CHANNELS ###############
channels=3
IMG_SHAPE = IMG_SIZE + (channels,)
base_model = tf.keras.applications.ResNet50(input_shape=IMG_SHAPE,
include_top=True,
weights='imagenet')
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)
print(feature_batch.shape)
base_model.summary()
## HERE IS WHERE THE MAGIC HAPPENS
base_model_config = base_model.get_config()
#### HERE THE CHANNELS WILL BE ALTERED TO 9 ####
base_model_config['layers'][0]["config"]["batch_input_shape"]=(None, 160, 160, channels)
base_model_config['layers'][1]["config"]["padding"]=((channels,channels), (channels,channels))
#######################################################
model=tf.keras.Model().from_config(base_model_config)
model.summary()
### HERE YOU ADD THE FINAL LAYERS FOR INFERENCE
inputs = tf.keras.Input(shape=(160, 160, channels))
x = base_model(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs, outputs)
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(x=train_dataset, epochs=2)
If there is any shape mismatch in the middle of the way, you know how to change it: base_model_config['layers'][x]....=........
There are a few resources about this idea, such as a blog post about transferring a ResNet on RGB data to multi-channel images here, and a relevant Colab Notebook. Below is a working example based on those resources:
import numpy as np
import tensorflow as tf
def tile_kernels(kernel, out_channels, batch_dim=-2):
mean_1d = np.mean(kernel, axis=batch_dim).reshape(kernel[:, :, -1:, :].shape)
tiled = np.tile(mean_1d, (out_channels, 1))
return tiled
def reshape_model_input(model_orig, custom_model, input_channels):
conf = custom_model.get_config()
layer_to_modify = conf["layers"][2]["config"]["name"]
layer_names = [conf['layers'][x]['name'] for x in range(len(conf['layers']))]
for layer in model_orig.layers:
if layer.name in layer_names:
if layer.get_weights() != []:
target_layer = custom_model.get_layer(layer.name)
if layer.name == layer_to_modify:
kernels, biases = layer.get_weights()
kernels_extra_channels = np.concatenate((kernels,
tile_kernels(kernels, input_channels - 3)),
axis=-2)
target_layer.set_weights([kernels_extra_channels, biases])
else:
target_layer.set_weights(layer.get_weights())
if __name__ == "__main__":
from tensorflow.keras.applications import ResNet50V2
resnet50 = ResNet50V2(weights='imagenet', include_top=False) # load resnet50 here - can be done differently
config = resnet50.get_config()
img_height = ...
img_width = ...
input_channels = 7
config["layers"][0]["config"]["batch_input_shape"] = (None, img_height, img_width, input_channels) # change the batch input shape to handle the different channel dimensions
custom_resnet = tf.keras.models.Model.from_config(config)
reshape_model_input(resnet50, custom_resnet, input_channels) # modify the custom model by reference
custom_resnet(np.zeros((1, img_width, img_height, input_channels))) # just verifying that predicting with the new shape works in the custom model
This process just iterates over each layer in the original model and sets the corresponding weights in the custom model. To produce the additional n 3 x 3 channels (in your case, n = 4, as you want 7 total channels) for the input, the mean is taken across the 3 RGB dimensions then replicated (as can be seen in the tile_kernels function). Another aggregation function could be used, such as the max, min, median, etc. If you don't want any of the weights from the original model (as in, not pretraining but just require the architecture), just modifying the original model's configuration and creating a new model from it will create a randomly initialized model:
resnet50 = ...
config = resnet50.get_config()
img_height = ...
img_width = ...
input_channels = ...
config["layers"][0]["config"]["batch_input_shape"] = (None, img_height, img_width, input_channels)
custom_resnet = tf.keras.models.Model.from_config(config)
Related
New to tf/python and have created a model that classifies text with a toxicity level (obscene, toxic, threat, etc). This is what I have so far and it does produce the summary, so I know it is loading correctly. How do I pass text to the model to return a prediction? Any help would be much appreciated.
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
checkpoint_path = "tf_model/the_model/saved_model.pb"
checkpoint_dir = os.path.dirname(checkpoint_path)
new_model = tf.keras.models.load_model(checkpoint_dir)
# Check its architecture
new_model.summary()
inputs = [
"tenserflow seems like it fits the bill but there are zero tutorials that outline how to reuse a model in a production environment "
]
predictions = new_model.predict(inputs)
print(predictions)
I get many error messages, some of the long winded ones are as follows:
WARNING:tensorflow:Model was constructed with shape (None, 150) for input KerasTensor(type_spec=TensorSpec(shape=(None, 150), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'"), but it was called on an input with incompatible shape (None, 1).
ValueError: Negative dimension size caused by subtracting 3 from 1 for '{{node model/conv1d/conv1d}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](model/conv1d/conv1d/ExpandDims, model/conv1d/conv1d/ExpandDims_1)' with input shapes: [?,1,1,256], [1,3,256,64].
This is the py code used to create and test it/prediction which works perfectly:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
TRAIN_DATA = "datasets/train.csv"
GLOVE_EMBEDDING = "embedding/glove.6B.100d.txt"
train = pd.read_csv(TRAIN_DATA)
train["comment_text"].fillna("fillna")
x_train = train["comment_text"].str.lower()
y_train = train[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values
max_words = 100000
max_len = 150
embed_size = 100
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=max_words, lower=True)
tokenizer.fit_on_texts(x_train)
x_train = tokenizer.texts_to_sequences(x_train)
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_len)
embeddings_index = {}
with open(GLOVE_EMBEDDING, encoding='utf8') as f:
for line in f:
values = line.rstrip().rsplit(' ')
word = values[0]
embed = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = embed
word_index = tokenizer.word_index
num_words = min(max_words, len(word_index) + 1)
embedding_matrix = np.zeros((num_words, embed_size), dtype='float32')
for word, i in word_index.items():
if i >= max_words:
continue
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
input = tf.keras.layers.Input(shape=(max_len,))
x = tf.keras.layers.Embedding(max_words, embed_size, weights=[embedding_matrix], trainable=False)(input)
x = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(128, return_sequences=True, dropout=0.1,
recurrent_dropout=0.1))(x)
x = tf.keras.layers.Conv1D(64, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(x)
avg_pool = tf.keras.layers.GlobalAveragePooling1D()(x)
max_pool = tf.keras.layers.GlobalMaxPooling1D()(x)
x = tf.keras.layers.concatenate([avg_pool, max_pool])
preds = tf.keras.layers.Dense(6, activation="sigmoid")(x)
model = tf.keras.Model(input, preds)
model.summary()
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=1e-3), metrics=['accuracy'])
batch_size = 128
checkpoint_path = "tf_model/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
save_weights_only=True,
verbose=1)
callbacks = [
tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_loss'),
tf.keras.callbacks.TensorBoard(log_dir='logs'),
cp_callback
]
model.fit(x_train, y_train, validation_split=0.2, batch_size=batch_size,
epochs=1, callbacks=callbacks, verbose=1)
latest = tf.train.latest_checkpoint(checkpoint_dir)
model.load_weights(latest)
# Save the entire model as a SavedModel.
model.save('tf_model/the_model')
predictions = model.predict(np.expand_dims(x_train[42], 0))
print(tokenizer.sequences_to_texts([x_train[42]]))
print(y_train[42])
print(predictions)
Final solution:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
checkpoint_path = "tf_model/the_model/saved_model.pb"
checkpoint_dir = os.path.dirname(checkpoint_path)
new_model = tf.keras.models.load_model(checkpoint_dir)
max_words = 100000
max_len = 150
# Check its architecture
# new_model.summary()
inputs = ["tenserflow seems like it fits the bill but there are zero tutorials that outline how to reuse a model in a production environment."]
# use same tokenizer used to build model
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=max_words, lower=True)
tokenizer.fit_on_texts(inputs)
# pass string to tokenizer and that 'array' is passed to predict
sequence = tokenizer.texts_to_sequences(inputs) # same tokenizer which is used on train data.
sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen = max_len)
predictions = new_model.predict(sequence)
print(predictions)
# [[0.0365479 0.01275077 0.02102855 0.00647011 0.02302513 0.00406089]]
It needs to be processed in the same way. This can be done with:
inputs = [
"tenserflow seems like it fits the bill but there are zero tutorials that outline
how to reuse a model in a production environment"]
sequence = tokenizer.texts_to_sequences(inputs) # same tokenizer which is used on train data.
sequence = pad_sequences(sequence, maxlen = max_len)
predictions = new_model.predict(sequence)
I have the following code trying to perform predictions on part of resnet model. However, I am retrieving error.
def layer_input_shape(Model, layer_index):
input_shape = np.array(Model.layers[layer_index - 1].output_shape)
input_shape = np.ndarray.tolist(np.delete(input_shape, 0))
return input_shape
def resnet50_Model(Model, trainable=True):
input_shape = layer_input_shape(Model, 1)
input = tf.keras.layers.Input(shape=input_shape)
first_layer = Model.layers[0]
first_layer.trainable = trainable
out = first_layer(input)
for i in range(1, 12):
layer_i = Model.layers[i]
layer_i.trainable = trainable
out = layer_i(out)
out = Conv2D(filters=2, kernel_size=2, strides=(2,2), activation='relu')(out)
out = Flatten()(out)
out = Dense(units=2,activation='softmax')(out)
result_model = tf.keras.models.Model(inputs=[input], outputs=out)
return result_model
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
img='/content/elephant.jpg'
img = image.load_img(img, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
preds = resnet_skip_model.predict(x)
print('Predicted:', decode_predictions(preds, top=3)[0])
Retrieving below error:
ValueError: `decode_predictions` expects a batch of predictions (i.e. a 2D array of shape (samples,
1000)). Found array with shape: (1, 3)
I added two output dense layer so I can only predict two classes and when I call decode it expects 1000 output last dense layer, therefore changed units from two to 1000
out = Dense(units=1000,activation='softmax')(out)
import logging
logging.getLogger("tensorflow").setLevel(logging.DEBUG)
try:
import tensorflow.compat.v2 as tf
except Exception:
pass
tf.enable_v2_behavior()
from tensorflow import keras
import numpy as np
import pathlib
# Train and export the model
# Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# Normalize the input image so that each pixel value is between 0 to 1.
train_images = train_images / 255.0
test_images = test_images / 255.0
# Define the model architecture
model = keras.Sequential([
keras.layers.InputLayer(input_shape=(28, 28)),
keras.layers.Reshape(target_shape=(28, 28, 1)),
keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation=tf.nn.relu),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Flatten(),
keras.layers.Dense(10, activation=tf.nn.softmax)
])
# Train the digit classification model
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(
train_images,
train_labels,
epochs=1,
validation_data=(test_images, test_labels)
)
# Convert to a TensorFlow Lite model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
tflite_models_dir = pathlib.Path("/tmp/mnist_tflite_models/")
tflite_models_dir.mkdir(exist_ok=True, parents=True)
tflite_model_file = tflite_models_dir/"mnist_model.tflite"
tflite_model_file.write_bytes(tflite_model)
# Convert using quantization
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
mnist_train, _ = tf.keras.datasets.mnist.load_data()
images = tf.cast(mnist_train[0], tf.float32) / 255.0
mnist_ds = tf.data.Dataset.from_tensor_slices((images)).batch(1)
def representative_data_gen():
for input_value in mnist_ds.take(100):
yield [input_value]
converter.representative_dataset = representative_data_gen
tflite_model_quant = converter.convert()
tflite_model_quant_file = tflite_models_dir/"mnist_model_quant.tflite"
tflite_model_quant_file.write_bytes(tflite_model_quant)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_model_quant = converter.convert()
tflite_model_quant_file = tflite_models_dir/"mnist_model_quant_io.tflite"
tflite_model_quant_file.write_bytes(tflite_model_quant)
# Run the TensorFlow Lite models
interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))
interpreter.allocate_tensors()
interpreter_quant = tf.lite.Interpreter(model_path=str(tflite_model_quant_file))
interpreter_quant.allocate_tensors()
input_index_quant = interpreter_quant.get_input_details()[0]["index"]
output_index_quant = interpreter_quant.get_output_details()[0]["index"]
# # Load TFLite model and allocate tensors.
# interpreter = tf.lite.Interpreter(model_path=str(tflite_model_quant_file))
# interpreter.allocate_tensors()
# _________________________________ get_tensor______________________
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# get details for each layer
all_layers_details = interpreter_quant.get_tensor_details()
# ___________________________________________________________________
weight1 = interpreter_quant.get_tensor(2)
weight2 = interpreter_quant.get_tensor(6)
# Example of modification:
for val in weight1:
if val> th:
val = 0
interpreter_quant.set_tensor(all_layers_details[2]['index'], weight1)
# Evaluate the models
# A helper function to evaluate the TF Lite model using "test" dataset.
def evaluate_model(interpreter):
input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]
# Run predictions on every image in the "test" dataset.
prediction_digits = []
for test_image in test_images:
# Pre-processing: add batch dimension and convert to float32 to match with
# the model's input data format.
test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
interpreter.set_tensor(input_index, test_image)
# Run inference.
interpreter.invoke()
# Post-processing: remove batch dimension and find the digit with highest
# probability.
output = interpreter.tensor(output_index)
digit = np.argmax(output()[0])
prediction_digits.append(digit)
# Compare prediction results with ground truth labels to calculate accuracy.
accurate_count = 0
for index in range(len(prediction_digits)):
if prediction_digits[index] == test_labels[index]:
accurate_count += 1
accuracy = accurate_count * 1.0 / len(prediction_digits)
return accuracy
print(evaluate_model(interpreter))
# NOTE: Colab runs on server CPUs, and TensorFlow Lite currently
# doesn't have super optimized server CPU kernels. So this part may be
# slower than the above float interpreter. But for mobile CPUs, considerable
# speedup can be observed.
print(evaluate_model(interpreter_quant))
I am using Tensorflow Lite to train and quantize a simple network on MNIST. Here is an example from Tensorflow Lite documentation: https://www.tensorflow.org/lite/performance/post_training_integer_quant
However in my method I want to modify and change some of the weight and model values after quantization and before testing the model. I know there is two command "get_tensor" and "set_tensor" to read and write to the tensors, however, it seems that "set_tensor" is just work to load and modify input. Is there any way that I can modify weights' value in TF-Lite? Thanks for your guidance. Here is the code: I got the tensor and called it weight1 and modified weight1. Then I want to assign it back to the quantized model. I used "set_tensor" but I got this error:
Process finished with exit code 138 (interrupted by signal 10: SIGBUS)
import tensorflow as tf
import numpy as np
import os
import re
import PIL
def read_image_label_list(img_directory, folder_name):
# Input:
# -Name of folder (test\\\\train)
# Output:
# -List of names of files in folder
# -Label associated with each file
cat_label = 1
dog_label = 0
filenames = []
labels = []
dir_list = os.listdir(os.path.join(img_directory, folder_name)) # List of all image names in 'folder_name' folder
# Loop through all images in directory
for i, d in enumerate(dir_list):
if re.search("train", folder_name):
if re.search("cat", d): # If image filename contains 'Cat', then true
labels.append(cat_label)
else:
labels.append(dog_label)
filenames.append(os.path.join(img_dir, folder_name, d))
return filenames, labels
# Define convolutional layer
def conv_layer(input, channels_in, channels_out):
w_1 = tf.get_variable("weight_conv", [5,5, channels_in, channels_out], initializer=tf.contrib.layers.xavier_initializer())
b_1 = tf.get_variable("bias_conv", [channels_out], initializer=tf.zeros_initializer())
conv = tf.nn.conv2d(input, w_1, strides=[1,1,1,1], padding="SAME")
activation = tf.nn.relu(conv + b_1)
return activation
# Define fully connected layer
def fc_layer(input, channels_in, channels_out):
w_2 = tf.get_variable("weight_fc", [channels_in, channels_out], initializer=tf.contrib.layers.xavier_initializer())
b_2 = tf.get_variable("bias_fc", [channels_out], initializer=tf.zeros_initializer())
activation = tf.nn.relu(tf.matmul(input, w_2) + b_2)
return activation
# Define parse function to make input data to decode image into
def _parse_function(img_path, label):
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_image(img_file, channels=3)
img_decoded.set_shape([None,None,3])
img_decoded = tf.image.resize_images(img_decoded, (28, 28), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
img_decoded = tf.image.per_image_standardization(img_decoded)
img_decoded = tf.cast(img_decoded, dty=tf.float32)
label = tf.one_hot(label, 1)
return img_decoded, label
tf.reset_default_graph()
# Define parameterspe
EPOCHS = 10
BATCH_SIZE_training = 64
learning_rate = 0.001
img_dir = 'C:/Users/tharu/PycharmProjects/cat_vs_dog/data'
batch_size = 128
# Define data
features, labels = read_image_label_list(img_dir, "train")
# Define dataset
dataset = tf.data.Dataset.from_tensor_slices((features, labels)) # Takes slices in 0th dimension
dataset = dataset.map(_parse_function)
dataset = dataset.batch(batch_size)
iterator = dataset.make_initializable_iterator()
# Get next batch of data from iterator
x, y = iterator.get_next()
# Create the network (use different variable scopes for reuse of variables)
with tf.variable_scope("conv1"):
conv_1 = conv_layer(x, 3, 32)
pool_1 = tf.nn.max_pool(conv_1, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
with tf.variable_scope("conv2"):
conv_2 = conv_layer(pool_1, 32, 64)
pool_2 = tf.nn.max_pool(conv_2, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
flattened = tf.contrib.layers.flatten(pool_2)
with tf.variable_scope("fc1"):
fc_1 = fc_layer(flattened, 7*7*64, 1024)
with tf.variable_scope("fc2"):
logits = fc_layer(fc_1, 1024, 1)
# Define loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf.cast(y, dtype=tf.int32)))
# Define optimizer
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.Session() as sess:
# Initiliaze all the variables
sess.run(tf.global_variables_initializer())
# Train the network
for i in range(EPOCHS):
# Initialize iterator so that it starts at beginning of training set for each epoch
sess.run(iterator.initializer)
print("EPOCH", i)
while True:
try:
_, epoch_loss = sess.run([train, loss])
except tf.errors.OutOfRangeError: # Error given when out of data
if i % 2 == 0:
# [train_accuaracy] = sess.run([accuracy])
# print("Step ", i, "training accuracy = %{}".format(train_accuaracy))
print(epoch_loss)
break
I've spent a few hours trying to figure out systematically why I've been getting 0 loss when I run this model.
Features = list of file locations for each image (e.g. ['\data\train\cat.0.jpg', /data\train\cat.1.jpg])
Labels = [Batch_size, 1] one_hot vector
Initially I thought it was because there was something wrong with my data. But I've viewed the data after being resized and the images seems fine.
Then I tried a few different loss functions because I thought maybe I'm misunderstanding what the the tensorflow function softmax_cross_entropy does, but that didn't fix anything.
I've tried running just the 'logits' section to see what the output is. This is just a small sample and the numbers seem fine to me:
[[0.06388957]
[0. ]
[0.16969752]
[0.24913025]
[0.09961276]]
Surely then the softmax_cross_entropy function should be able to compute this loss given that the corresponding labels are 0 or 1? I'm not sure if I'm missing something. Any help would be greatly appreciated.
As documented:
logits and labels must have the same shape, e.g. [batch_size, num_classes] and the same dtype (either float16, float32, or float64).
Since you mentioned your label is "[Batch_size, 1] one_hot vector", I would assume both your logits and labels are [Batch_size, 1] shape. This will certainly lead to zero loss. Conceptually speaking, you have only 1 class (num_classes=1) and your cannot be wrong (loss=0).
So at least for you labels, you should transform it: tf.one_hot(indices=labels, depth=num_classes). Your prediction logits should also have a shape [batch_size, num_classes] output.
Alternatively, you can use sparse_softmax_cross_entropy_with_logits, where:
A common use case is to have logits of shape [batch_size, num_classes] and labels of shape [batch_size]. But higher dimensions are supported.
I am using tensorflow version 1.5 on Windows 10. I am using the Tensorflow slim model of Inception V4 network which has been picked up from the Github page, using their pretrained weights and adding my own layers at the end to classify 120 different objects.The size of my training dataset is around 10,000 images each of 299*299*3. This is the complete code except the lines containing the import modules and dataset paths.
tf.logging.set_verbosity(tf.logging.INFO)
with slim.arg_scope(inception_blocks_v4.inception_v4_arg_scope()):
X_input = tf.placeholder(tf.float32, shape = (None, image_size, image_size, 3))
Y_label = tf.placeholder(tf.float32, shape = (None, num_classes))
targets = convert_to_onehot(labels_dir, no_of_features = num_classes)
targets = tf.convert_to_tensor(targets, dtype = tf.float32)
Images = [] #TO STORE THE RESIZED IMAGES IN THE FORM OF LIST TO PASS IT TO tf.train.batch()
images = glob.glob(images_file_path)
i = 0
for my_img in images:
image = mpimg.imread(my_img)[:, :, :3]
image = tf.convert_to_tensor(image, dtype = tf.float32)
Images.append(image)
logits, end_points = inception_blocks_v4.inception_v4(inputs = X_input, num_classes = pre_num_classes, is_training = True, create_aux_logits= False)
pretrained_weights = slim.assign_from_checkpoint_fn(ckpt_dir, slim.get_model_variables('InceptionV4'))
with tf.Session() as sess:
pretrained_weights(sess)
#MY LAYERS, add bias as well
my_layer = slim.fully_connected(logits, 560, activation_fn=tf.nn.relu, scope='myLayer1', weights_initializer = tf.truncated_normal_initializer(stddev = 0.001), weights_regularizer=slim.l2_regularizer(0.00005),biases_initializer = tf.truncated_normal_initializer(stddev=0.001), biases_regularizer=slim.l2_regularizer(0.00005))
my_layer = slim.dropout(my_layer, keep_prob = 0.6, scope = 'myLayer2')
my_layer = slim.fully_connected(my_layer, num_classes,activation_fn = tf.nn.relu,scope= 'myLayer3', weights_initializer = tf.truncated_normal_initializer(stddev=0.001), weights_regularizer=slim.l2_regularizer(0.00005), biases_initializer = tf.truncated_normal_initializer(stddev=0.001), biases_regularizer=slim.l2_regularizer(0.00005))
my_layer_logits = slim.fully_connected(my_layer, num_classes, activation_fn=None,scope='myLayer4')
loss = tf.losses.softmax_cross_entropy(onehot_labels = Y_label, logits = my_layer_logits)
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train_op = optimizer.minimize(loss)
batch_size = 8
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100):
images, labels = tf.train.batch([Images, targets], batch_size = batch_size, num_threads = 1, capacity = (4*batch_size), enqueue_many=True)
print (images) #To check their shape
print (labels)
train_op.run(feed_dict = {X_input:images.eval(session = sess) ,Y_label:labels.eval(session = sess)})
print (i)
I used the print(i) statement to keep track of how many epochs are done. After running the script for more than 3 hours, not even a single epoch of training is completed. It seems that it gets stuck at train_op.run() step. I don't know what is the problem.