Decode prediction of custom keras model - python

some days ago I started with ML as I wanted to do a hcaptcha solver. I have everything ready, I just need to train a model that will classify the captcha images so I can send a request with the good answer and get the captcha token.
I've looked into some tutorials on how to train my own model with several classes. I have it the following way:
1 trainer folder, 1 validation folder and 1 testing folder. On the trainer and validation folder there is more subfolders named airplane, truck, boat, train,... each one containing aprox 20 images. On the testing folder, some random images related with the classes I have.
I have trained the model and it seems like I'm getting a 1 accuracy. Then I get some of the random testing images and try to predict them using this saved model. It does it's job and predicts them, returning an array of numbers. The thing is I don't know how to decode those predictions nor how to see the classes list with his representative integer before predicting.
I'm super new on this so I'm sure anything will help :)
My code below:
import os
from keras.preprocessing import image
from keras.models import Sequential
from keras import layers
from keras.models import load_model
import numpy as np
trainer_path = "./img/trainer"
validator_path = "./img/validator"
testing_path = "./img/tester"
WIDTH = 128
HEIGHT = 128
BATCH = 30
EPOCHS = 15
train_dataset = image.image_dataset_from_directory(
trainer_path,
label_mode="int",
batch_size=BATCH,
image_size=(WIDTH, HEIGHT)
)
validator_dataset = image.image_dataset_from_directory(
validator_path,
label_mode="int",
batch_size=BATCH,
image_size=(WIDTH, HEIGHT)
)
model = Sequential([
layers.Input((WIDTH, HEIGHT, 3)),
layers.Conv2D(16, 3, padding="same"),
layers.Conv2D(32, 3, padding="same"),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(10)
])
model.compile(
optimizer="adam",
loss=[
"sparse_categorical_crossentropy"
],
metrics=["accuracy"]
)
model_fit = model.fit(
train_dataset,
epochs=EPOCHS,
validation_data=validator_dataset,
verbose=2
)
#loading the saved model
model = load_model("./model")
for i in os.listdir(testing_path):
img = image.load_img(testing_path + "/" + i, target_size=(WIDTH, HEIGHT, 3))
img_array = image.img_to_array(img)
img_batch = np.expand_dims(img_array, axis=0)
prediction = model.predict(img_batch)
print(prediction)
print()
Output example:
[[ 875.5614 3123.8257 1521.7046 90.056526 335.5274
-785.3671 1075.9199 1105.3068 -14.917503 -3745.6494 ]]

You have to apply activation function on last Dense layer, if you want to classify the image it should be softmax (you will get probabilities for all classes), here is the link:
https://keras.io/api/layers/activations/
When it comes to class names it should be sorted by alphanumerical values, you can also pass class_names argument, here is the link to arguments of this function:
https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory

Related

How to train transfer-learning model on custom dataset? ValueError: Shape must be rank 4

I am trying to build a transfer learning model to classify images. The images are a gray-scale (2D). previously I used image_dataset_from_directory method to read the images and there was no problem. However, I am trying to use a custom read function to have more control and access on the data such as knowing how many images in each class. When using this custom read function, I get an error (down below) while trying to train the model. I am not sure about what caused this error.
part1: reading the dataset
import numpy as np
import os
import tensorflow as tf
import cv2
from tensorflow import keras
# neural network
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental import preprocessing
IMG_WIDTH=160
IMG_HEIGHT=160
DATA_PATH = r"C:\Users\user\Documents\chest_xray"
TRAIN_DIR = os.path.join(DATA_PATH, 'train')
def create_dataset(img_folder):
img_data_array=[]
class_name=[]
for dir1 in os.listdir(img_folder):
for file in os.listdir(os.path.join(img_folder, dir1)):
image_path= os.path.join(img_folder, dir1, file)
image= cv2.imread( image_path, 0)
image=cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
image=np.array(image)
image = image.astype('float32')
image /= 255
img_data_array.append(image)
class_name.append(dir1)
return img_data_array, class_name
# extract the image array and class name
img_data, class_name =create_dataset(TRAIN_DIR)
target_dict={k: v for v, k in enumerate(np.unique(class_name))}
target_dict
target_val= [target_dict[class_name[i]] for i in range(len(class_name))]
this part will produce A list that has a size of 5232. inside the list there are numpy arrays of size 160X160 (float 32)
part 2: creating the model
def build_model():
inputs = tf.keras.Input(shape=(160, 160, 3))
x = Sequential(
[
preprocessing.RandomRotation(factor=0.15),
preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1),
preprocessing.RandomFlip(),
preprocessing.RandomContrast(factor=0.1),
],
name="img_augmentation",
)(inputs)
# x = img_augmentation(inputs)
model=tf.keras.applications.EfficientNetB7(include_top=False,
drop_connect_rate=0.4,
weights='imagenet',
input_tensor=x)
# Freeze the pretrained weights
model.trainable = False
# Rebuild top
x = tf.keras.layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
x = tf.keras.layers.BatchNormalization()(x)
top_dropout_rate = 0.2
x = tf.keras.layers.Dropout(top_dropout_rate, name="top_dropout")(x)
outputs = tf.keras.layers.Dense(1, name="pred")(x)
# Compile
model = tf.keras.Model(inputs, outputs, name="EfficientNet")
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
model.compile(
optimizer=optimizer,
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=["accuracy"]
)
return model
model = build_model()
part 3: train the model
history = model.fit(x=np.array(img_data), y=np.array(target_val), epochs=5)
the error I get:
ValueError: Shape must be rank 4 but is rank 3 for '{{node
EfficientNet/img_augmentation/random_rotation_1/transform/ImageProjectiveTransformV3}} =
ImageProjectiveTransformV3[dtype=DT_FLOAT, fill_mode="REFLECT", interpolation="BILINEAR"]
(IteratorGetNext, EfficientNet/img_augmentation/random_rotation_1/rotation_matrix/concat,
EfficientNet/img_augmentation/random_rotation_1/transform/strided_slice,
EfficientNet/img_augmentation/random_rotation_1/transform/fill_value)' with input shapes:
[?,160,160], [?,8], [2], [].
The problem in the code is that OpenCV reads the image in grayscale format, but the grayscale format of the image returned is not (160,160,1) but (160,160).
Because of this fact, the error is thrown.
I managed to replicate your problem by testing it locally.
Say we randomly train on 12 samples.
Possible input formats:
#This one works
1. history = model.fit(x=np.random.rand(12,160,160,3), y=np.array([1,1,1,1,1,1,0,0,0,0,0,0]), epochs=5,verbose=1) WORKS
#This one works
2. history = model.fit(x=np.random.rand(12,160,160,1), y=np.array([1,1,1,1,1,1,0,0,0,0,0,0]), epochs=5,verbose=1) WORKS
#This one fails
3. history = model.fit(x=np.random.rand(12,160,160), y=np.array([1,1,1,1,1,1,0,0,0,0,0,0]), epochs=5,verbose=1) FAILS
(1) and (2) work.
(3) fails, yielding:
ValueError: Shape must be rank 4 but is rank 3 for '{{node
EfficientNet/img_augmentation/random_rotation_4/transform/ImageProjectiveTransformV2}} = ImageProjectiveTransformV2[dtype=DT_FLOAT, fill_mode="REFLECT", interpolation="BILINEAR"](IteratorGetNext,
EfficientNet/img_augmentation/random_rotation_4/rotation_matrix/concat,
EfficientNet/img_augmentation/random_rotation_4/transform/strided_slice)'
with input shapes: [?,160,160], [?,8], [2].
Therefore, ensure that your data format is in the shape (160,160,1) or (160,160,3).
As an alternative, after you you read the image with OpenCV, you can use
image = np.expand_dims(image,axis=-1)
to programatically insert the last axis (the grayscale).

Problem with integrating keras into a sklearn pipeline

I am using a wrapper from sklearn to find the best hyperparameters for my Keras model. Briefly, this model is a conv autoencoder and takes in data with the shape of (x,x,x). Keras wrapper seems to take data with the shape of (x,x). Since it is autoencoder model, the data would be in the shape of (x,x,x) and I think because of this reason, I am getting the following error ValueError: Invalid shape for y: (3744, 288, 1). How can I resolve this?
full code
"""
# Load libraries
"""
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from matplotlib import pyplot as plt
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
# Set random seed
np.random.seed(0)
"""
## Load the data
"""
master_url_root = "https://raw.githubusercontent.com/numenta/NAB/master/data/"
df_small_noise_url_suffix = "artificialNoAnomaly/art_daily_small_noise.csv"
df_small_noise_url = master_url_root + df_small_noise_url_suffix
df_small_noise = pd.read_csv(
df_small_noise_url, parse_dates=True, index_col="timestamp"
)
df_daily_jumpsup_url_suffix = "artificialWithAnomaly/art_daily_jumpsup.csv"
df_daily_jumpsup_url = master_url_root + df_daily_jumpsup_url_suffix
df_daily_jumpsup = pd.read_csv(
df_daily_jumpsup_url, parse_dates=True, index_col="timestamp"
)
"""
## Prepare training data
"""
# Normalize and save the mean and std we get,
# for normalizing test data.
training_mean = df_small_noise.mean()
training_std = df_small_noise.std()
df_training_value = (df_small_noise - training_mean) / training_std
print("Number of training samples:", len(df_training_value))
"""
### Create sequences
Create sequences combining `TIME_STEPS` contiguous data values from the
training data.
"""
TIME_STEPS = 288
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
output = []
for i in range(len(values) - time_steps):
output.append(values[i : (i + time_steps)])
return np.stack(output)
x_train = create_sequences(df_training_value.values)
print("Training input shape: ", x_train.shape)
"""
## Build a model
We will build a convolutional reconstruction autoencoder model. The model will
take input of shape `(batch_size, sequence_length, num_features)` and return
output of the same shape. In this case, `sequence_length` is 288 and
`num_features` is 1.
"""
# Create function returning a compiled network
def create_network(optimizer='Adam'):
model = keras.Sequential(
[
layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
layers.Conv1D(
filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Dropout(rate=0.2),
layers.Conv1D(
filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Conv1DTranspose(
filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Dropout(rate=0.2),
layers.Conv1DTranspose(
filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
),
layers.Conv1DTranspose(filters=1, kernel_size=7, padding="same"),
]
)
model.compile(optimizer=keras.optimizers.optimizer(learning_rate=0.001), loss="mse", metrics=['mae'])
return model
# Hyper-parameter tuning
# Wrap Keras model so it can be used by scikit-learn
CAE = KerasClassifier(build_fn=create_network, verbose=0)
# Create hyperparameter space
epochs = [5, 10]
batches = [5, 10, 100]
optimizers = ['rmsprop', 'adam']
# Create hyperparameter options
hyperparameters = dict(optimizer=optimizers, epochs=epochs, batch_size=batches)
# Create grid search
grid = GridSearchCV(estimator=CAE, cv=3, param_grid=hyperparameters)
# Fit grid search (we use train data as test data here since this is reconctruction model)
grid_result = grid.fit(x_train, x_train, validation_split=0.1)
# View hyperparameters of best neural network
print(grid_result.best_params_)
This is a special problem with KerasClassifier.fit(). If you look at its source code, you'd see that it throws error if y has >2 dimensions. Perhaps it is not aimed at autoencoders optimization :)
Your choices are:
subclass KerasClassifier.fit() and fix this limitation
use another optimization engine (my preference would be optuna)
squeeze the extra dimension out in the end of the model and reduce dimensions in y_train.
for 3) use these lines:
layers.Reshape((288,)) # add in the end of model constructor
y_train = x_train.reshape(x_train.shape[:-1]) # to match the above change
grid_result = grid.fit(x_train, y_train, validation_split=0.1) # feed y_train
There is one more, most elegant solution:
replace keras.wrappers.scikit_learn.KerasClassifier with keras.wrappers.scikit_learn.KerasRegressor. The latter is not checking dimensions of y.

TensorFlow batch tensors with different shapes

I am studying Neural Network and I have encountered what is probably a silly problem which I can't figure out. For my first ever network, I have to create a flower image classifier in Keras and TensorFlow using the oxford_flowers102 and the MobileNet pre-trained model from TensorFlow Hub.
The issue seems to be that the images are not re-sized to (224,224,3), but they keep their original shapes which are different from one and other. However, according to my class material, my re-sizing code is correct so I don't understand what is going on and what I am doing wrong.
Thank you very much for all your help.
# LOADING
dataset, dataset_info = tfds.load('oxford_flowers102', as_supervised=True, with_info=True)
training_set, testing_set, validation_set = dataset['train'], dataset['test'],dataset['validation']
# PROCESSING AND BATCHES
def normalize(img, lbl):
img = tf.cast(img, tf.float32)
img = tf.image.resize(img, size=(224,224))
img /= 255
return img, lbl
batch_size = 64
training_batches = training_set.cache().shuffle(train_examples//4).batch(batch_size).map(normalize).prefetch(1)
validation_batches = validation_set.cache().batch(batch_size).map(normalize).prefetch(1)
testing_batches = testing_set.cache().batch(batch_size).map(normalize).prefetch(1)
# BUILDING THE NETWORK
URL = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"
mobile_net = hub.KerasLayer(URL, input_shape=(224, 224,3))
mobile_net.trainable = False
skynet = tf.keras.Sequential([
mobile_net,
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(num_classes, activation= 'softmax')
])
# TRAINING THE NETWORK
skynet.compile(optimizer='adam', loss= 'sparse_categorical_crossentropy', metrics=['accuracy'])
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
Epochss = 25
history = skynet.fit(training_batches,
epochs= Epochss,
validation_data=validation_set,
callbacks=[early_stopping])
ERROR:
InvalidArgumentError: Cannot batch tensors with different shapes in component 0. First element had shape [590,501,3] and element 1 had shape [500,752,3].
[[node IteratorGetNext (defined at /opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_distributed_function_18246]
Function call stack:
distributed_function
The problem was that in your input pipeline you were batching your dataset before you were making your images of equal size. You're def normalize(img, lbl) is only made to handle a single image and not a complete batch.
So in order to make your code run, you will have to make the following changes, you will have to call the map API before batch API as shown below.
batch_size = 64
training_batches = training_set.cache().map(normalize).batch(batch_size).prefetch(1)
validation_batches = validation_set.cache().map(normalize).batch(batch_size).prefetch(1)
testing_batches = testing_set.cache().map(normalize).batch(batch_size).prefetch(1)

Keras conditional passing one model output to another model

I am trying to neural network in keras, which first check if its a cat or dog (base model).
if it is a dog, then it is passes through another model (sub-model-1)
if it is a cat, then it is passes through another model (sub-model-2)
Sub-model are small model specially trained for classifying on the basis of breed.. so sub-model-1 will class dog into various dog's breed. . while sub-model-2 will classify cat into various cat's breed.
The problem i am facing is :
I dont know how to add conditional layer,so that if base model has 5 million neuron and each sub-model has 2 million -2million neuron.. if an image is passed through base model, then it should only pass through sub-model1 or sub-model2.. so in total only 7 million neuron at action in passing one image to final output.
Any help, reference, everything would be appreciable.
Here is another solution which may train faster, run faster and use less RAM, give better performance, and be easier to use than the alternatives listed here.
Just use a single model with multiple outputs: a binary output (cat/dog), a cat breed output (multiclass), and a dog breed output (multiclass). During training, you can use a custom loss function to ignore the loss that corresponds to the wrong species (for example, ignore the cat breed output for dog images).
The benefits are:
Faster training: just one model to train.
Better performance: you can often get better performance when doing multi-task learning like this. That's because dog images and cat images have a lot in common, so it's helpful to train a single base neural network for both and then build specialized networks on top of that.
Less RAM and less compute: instead of having to go through two CNNs (one for the cat/dog detector and one for the breed), we just go through one (the base network). This largely compensates the unnecessary computations that we do by going through the dog breed classifier even when the cat/dog detector says it's a cat.
Easier to use: just one call to the model, and you get everything you need all at once. Moreover, if the cat/dog detector is not quite sure (for example it outputs a 50% probability), then you can at least have reasonable candidates for both cats and dogs.
Here's a working example. You just need to replace the data with your own. Note that there are three labels:
cat/dog: for example [0, 1, 1, 0] for dog, cat, cat, dog
cat breed: for example [-1, 2, 0, -1] for not-a-cat, 2nd cat class, 0th cat class, not-a-cat
dog breed: for example [3, -1, -1, 1] for 3rd dog class, not-a-dog, not-a-dog, 1st dog class
import numpy as np
import tensorflow as tf
from tensorflow import keras
np.random.seed(1)
tf.random.set_seed(1)
num_images = 200
num_cat_breeds = 10
num_dog_breeds = 15
X_train = np.random.random([num_images, 32, 32, 3])
y_breed = np.random.randint(num_cat_breeds + num_dog_breeds, size=num_images)
y_is_cat = y_breed < num_cat_breeds
y_cat_breed = np.where(y_is_cat, y_breed, -1)
y_dog_breed = np.where(y_is_cat, -1, y_breed - num_cat_breeds)
base_model = keras.Sequential([
keras.layers.Conv2D(filters=32, kernel_size=3, activation="relu"),
keras.layers.Flatten(),
])
model_is_cat = keras.Sequential([
keras.layers.Dense(1, activation="sigmoid")
])
model_cat_breed = keras.Sequential([
keras.layers.Dense(num_cat_breeds, activation="softmax")
])
model_dog_breed = keras.Sequential([
keras.layers.Dense(num_dog_breeds, activation="softmax")
])
image_input = keras.layers.Input(shape=[32, 32, 3])
z = base_model(image_input)
is_cat = model_is_cat(z)
cat_breed = model_cat_breed(z)
dog_breed = model_dog_breed(z)
model = keras.Model(inputs=[image_input],
outputs=[is_cat, cat_breed, dog_breed])
def optional_crossentropy(y_true, y_pred):
is_not_ignored = y_true != -1
y_true_no_ignore = tf.where(is_not_ignored, y_true, 0)
mask = tf.cast(is_not_ignored, tf.float32)
return keras.losses.sparse_categorical_crossentropy(y_true_no_ignore, y_pred) * mask
model.compile(loss=["binary_crossentropy",
optional_crossentropy,
optional_crossentropy],
optimizer="adam")
model.fit(X_train, [y_is_cat, y_cat_breed, y_dog_breed], epochs=2)
y_is_cat_pred, y_cat_breed_pred, y_dog_breed_pred = model.predict(X_train[:2])
print(y_is_cat_pred)
print(y_cat_breed_pred)
print(y_dog_breed_pred)
Method 1.
There is a way, where you can define Dense layer with static weights and multiple outputs to 0, based on previous model outputs. However it's not a way, how you would usually do.
Method 2.
And what is actually we are doing.
#staticmethod
def animal_breed(image):
# Just an example for getting some Models.
def get_model(inputs):
y = Dense(5)(image)
y = Dense(5, name='final-1')(y)
return Model(input=inputs, output=Dense(10)(y))
# Define Base Model
DogCatModel = get_model(
inputs=image)
result = DogCatModel.predict(image)
# Get Base model on condition. Or load your model
# from any other source.
def get_specific(value, model1, model2):
if value[0] > value[1]:
return model1
return model2
# Just a mock of inserting previous result
# In real works you wanted to inserted scalar results
# to the last layers(After CNN)
inputs = inputs[0][0] = result
SpecificModel = get_specific(
result, get_model(inputs), get_model(inputs)
)
return SpecificModel.predict(inputs)
Why it's so? You might expect something else, but in fact it's common solution which easy to scale. You will not usually use layers itself to combine different models. + it's also much more easier to configure/freeze settings.
I suggest you train the cat/dog binary classification model independently, as well as the cat breed and dog breed models. Then, you can use a custom Keras model for inference. Here is a working example, you just need to load your own dataset, and adjust the model architectures to your liking.
import numpy as np
import tensorflow as tf
from tensorflow import keras
np.random.seed(1)
tf.random.set_seed(1)
num_images = 200
num_cat_breeds = 10
num_dog_breeds = 15
X_train = np.random.random([num_images, 32, 32, 3])
y_breed = np.random.randint(num_cat_breeds + num_dog_breeds, size=num_images)
y_is_cat = y_breed < num_cat_breeds
y_cat_breed = y_breed[y_is_cat]
y_dog_breed = y_breed[~y_is_cat] - num_cat_breeds
model_cat_or_dog = keras.Sequential([
keras.layers.Conv2D(filters=32, kernel_size=3, activation="relu"),
keras.layers.Flatten(),
keras.layers.Dense(1, activation="sigmoid")
])
model_cat_or_dog.compile(loss="binary_crossentropy", optimizer="adam")
model_cat_or_dog.fit(X_train, y_is_cat, epochs=2)
model_cat_breed = keras.Sequential([
keras.layers.Conv2D(filters=32, kernel_size=3, activation="relu"),
keras.layers.Flatten(),
keras.layers.Dense(num_cat_breeds, activation="softmax")
])
model_cat_breed.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
model_cat_breed.fit(X_train[y_is_cat], y_cat_breed, epochs=2)
model_dog_breed = keras.Sequential([
keras.layers.Conv2D(filters=32, kernel_size=3, activation="relu"),
keras.layers.Flatten(),
keras.layers.Dense(num_dog_breeds, activation="softmax")
])
model_dog_breed.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
model_dog_breed.fit(X_train[~y_is_cat], y_dog_breed, epochs=2)
class BreedModel(keras.Model):
def __init__(self, model_cat_or_dog, model_cat_breed, model_dog_breed, **kwargs):
super().__init__(**kwargs)
self.model_cat_or_dog = keras.models.clone_model(model_cat_or_dog)
self.model_cat_breed = keras.models.clone_model(model_cat_breed)
self.model_dog_breed = keras.models.clone_model(model_dog_breed)
def __call__(self, inputs):
y_proba_is_cat = self.model_cat_or_dog(inputs)
y_is_cat = tf.squeeze(y_proba_is_cat > 0.5)
cat_images = tf.boolean_mask(inputs, y_is_cat)
dog_images = tf.boolean_mask(inputs, ~y_is_cat)
Y_proba_cat_breed = self.model_cat_breed(cat_images)
Y_proba_dog_breed = self.model_dog_breed(dog_images)
return y_is_cat, y_proba_is_cat, Y_proba_cat_breed, Y_proba_dog_breed
num_test_images = 50
X_test = np.random.random([num_test_images, 32, 32, 3])
model = BreedModel(model_cat_or_dog, model_cat_breed, model_dog_breed)
y_is_cat, y_proba_is_cat, Y_proba_cat_breed, Y_proba_dog_breed = model(X_test)

How do I structure a Keras model for a custom image regression problem?

I'm attempting to develop a regression model using Tensorflow 2 and the keras API using a custom data set of png images. However, I'm not entirely sure what layers I should be using and how. I put together what I thought was a very simple model as a starting point however when I attempt to train the model the loss and accuracy values printed out are consistently 0. This leads me to believe my loss calculations are not working but I have no idea why. Below is a snippet of my source code, the full project for which can be found here:
import tensorflow as tf
import os
import random
import pathlib
AUTOTUNE = tf.data.experimental.AUTOTUNE
TRAINING_DATA_DIR = r'specgrams'
def gen_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(256, 128, 3)),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
def fetch_batch(batch_size=1000):
all_image_paths = []
all_image_labels = []
data_root = pathlib.Path(TRAINING_DATA_DIR)
files = data_root.iterdir()
for file in files:
file = str(file)
all_image_paths.append(os.path.abspath(file))
label = file[:-4].split('-')[2:3]
label = float(label[0]) / 200
all_image_labels.append(label)
def preprocess_image(path):
img_raw = tf.io.read_file(path)
image = tf.image.decode_png(img_raw, channels=3)
image = tf.image.resize(image, [256, 128])
image /= 255.0
return image
def preprocess(path, label):
return preprocess_image(path), label
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
image_ds = path_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
label_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
ds = tf.data.Dataset.zip((image_ds, label_ds))
ds = ds.shuffle(buffer_size=len(os.listdir(TRAINING_DATA_DIR)))
ds = ds.repeat()
ds = ds.batch(batch_size)
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
ds = fetch_batch()
model = gen_model()
model.fit(ds, epochs=1, steps_per_epoch=10)
The code above is supposed to read in some spectrograms stored as 256 x 128 px png files, convert them to tensors and fit them so a regression model to predict a value (in this case the BPM of the music used to generate the spectrogram). The image file names contain the BPM which is divided by 200 to produce a value between 0 and 1 as the label.
As stated before, this code does run successfully but after each training step the loss and accuracy values printed out are always exactly 0.00000 and do not change.
It's also worth noting that I actually want my model to predict multiple values, not just a single BPM value but this is a separate issue and as such I have posted a separate question for that here.
Anyway for the answer. Regression model requires loss function related such as 'mean_squared_error', 'mean_absolut_error', 'mean_absolute_percentage_error' and 'mean_squared_logarithmic_error.
def gen_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(256, 128, 3)),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(1)
])
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='mean_squared_error',
metrics=['accuracy'])
return model

Categories