Related
I structured a Convolutional LSTM model to predict the forthcoming Bitcoin price data, using the analyzed past data of the Bitcoin close price and other features.
Let me jump straight to the code:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import tensorflow.keras as keras
import keras_tuner as kt
from keras_tuner import HyperParameters as hp
from keras.models import Sequential
from keras.layers import InputLayer, ConvLSTM1D, LSTM, Flatten, RepeatVector, Dense, TimeDistributed
from keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
import keras.backend as K
from keras.losses import Huber
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
DIR = '../input/btc-features-targets'
SEG_DIR = '../input/segmented'
segmentized_features = os.listdir(SEG_DIR)
btc_train_features = []
for seg in segmentized_features:
train_features = pd.read_csv(f'{SEG_DIR}/{seg}')
train_features.set_index('date', inplace=True)
btc_train_features.append(scaler.fit_transform(train_features.values))
btc_train_targets = pd.read_csv(f'{DIR}/btc_train_targets.csv')
btc_train_targets.set_index('date', inplace=True)
btc_test_features = pd.read_csv(f'{DIR}/btc_test_features.csv')
btc_tef1 = btc_test_features.iloc[:111]
btc_tef2 = btc_test_features.iloc[25:]
btc_tef1.set_index('date', inplace=True)
btc_tef2.set_index('date', inplace=True)
btc_test_targets = pd.read_csv(f'{DIR}/btc_test_targets.csv')
btc_test_targets.set_index('date', inplace=True)
btc_trt_log = np.log(btc_train_targets)
btc_tefs1 = scaler.fit_transform(btc_tef1.values)
btc_tefs2 = scaler.fit_transform(btc_tef2.values)
btc_tet_log = np.log(btc_test_targets)
scaled_train_features = []
for features in btc_train_features:
shape = features.shape
scaled_train_features.append(np.expand_dims(features, [0,3]))
shape_2 = btc_tefs1.shape
btc_tefs1 = np.expand_dims(btc_tefs1, [0,3])
shape_3 = btc_tefs2.shape
btc_tefs2 = np.expand_dims(btc_tefs2, [0,3])
btc_trt_log = btc_trt_log.values[0]
btc_tet_log = btc_tet_log.values[0]
def build(hp):
model = keras.Sequential()
# Input Layer
model.add(InputLayer(input_shape=(111,32,1)))
# ConvLSTM1D
convLSTM_hp_filters = hp.Int(name='convLSTM_filters', min_value=32, max_value=512, step=32)
convLSTM_hp_kernel_size = hp.Choice(name='convLSTM_kernel_size', values=[3,5,7])
convLSTM_activation = hp.Choice(name='convLSTM_activation', values=['selu', 'relu'])
model.add(ConvLSTM1D(filters=convLSTM_hp_filters,
kernel_size=convLSTM_hp_kernel_size,
padding='same',
activation=convLSTM_activation,
use_bias=True,
bias_initializer='zeros'))
# Flatten
model.add(Flatten())
# RepeatVector
model.add(RepeatVector(5))
# LSTM
LSTM_hp_units = hp.Int(name='LSTM_units', min_value=32, max_value=512, step=32)
LSTM_activation = hp.Choice(name='LSTM_activation', values=['selu', 'relu'])
model.add(LSTM(units=LSTM_hp_units, activation=LSTM_activation, return_sequences=True))
# TimeDistributed Dense
dense_units = hp.Int(name='dense_units', min_value=32, max_value=512, step=32)
dense_activation = hp.Choice(name='dense_activation', values=['selu', 'relu'])
model.add(TimeDistributed(Dense(units=dense_units, activation=dense_activation)))
# TimeDistributed Dense_Output
model.add(Dense(1))
# Set Learning Rate
hp_learning_rate = hp.Choice(name='learning_rate', values=[1e-2, 1e-3, 1e-4])
# Compile Model
model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
loss=Huber(),
metrics=[RootMeanSquaredError()])
return model
tuner = kt.Hyperband(build,
objective=kt.Objective('root_mean_squared_error', direction='min'),
max_epochs=10,
factor=3)
early_stop = EarlyStopping(monitor='root_mean_squared_error', patience=5)
opt_hps = []
for train_features in scaled_train_features:
tuner.search(train_features, btc_trt_log, epochs=50, callbacks=[early_stop])
opt_hps.append(tuner.get_best_hyperparameters(num_trials=1)[0])
models, epochs = ([] for _ in range(2))
for hps in opt_hps:
model = tuner.hypermodel.build(hps)
models.append(model)
history = model.fit(train_features, btc_trt_log, epochs=70, verbose=0)
rmse = history.history['root_mean_squared_error']
best_epoch = rmse.index(min(rmse)) + 1
epochs.append(best_epoch)
hypermodel = tuner.hypermodel.build(opt_hps[0])
for train_features, epoch in zip(scaled_train_features, epochs): hypermodel.fit(train_features, btc_trt_log, epochs=epoch)
tp1 = hypermodel.predict(btc_tefs1).flatten()
tp2 = hypermodel.predict(btc_tefs2).flatten()
test_predictions = np.concatenate((tp1, tp2[86:]), axis=None)
The hyperparameters of the model are configured using keras_tuner; as there were ResourceExhaustError issues output by the notebook when training is done with the full features dataset, sequentially segmented datasets are used instead (and apparently, referring to the study done utilizing the similar model architecture, training is able to be efficiently done through this training approach).
The input dimension of each segmented dataset is (111,32,1).
There aren't any issues reported until before the last code block. The models work fine. Yet, when the .predict() function is executed, the notebook prints out an error, which states that the dimension of the input features for making predictions is incompatible with the dimension of the input features used while training. I did not understand the reason behind its occurrence, since as far as I know, the input dimensions of a train dataset for a DNN model cannot be identical as the input dimensions of a test dataset.
Even though all the price data from 2018 to early 2021 are used as training datasets, predictions are only needed for the mid 2021 timeframe.
The dataset used for prediction has a dimension of (136,32,1).
I tried matching the dimension of this dataset to (111,32,1), through index slicing.
Now this showed issues in the output dimension. While predictions should be made for 136 data points, the result only returned 10.
Are there any issues relevant to the model configuration? Cannot interpret the current situation.
I am learning to work with categorical data following the code examples of same old entity embedding by Abhishek Thakur. He is using visual studio one as the python editor, but I am using Google Colab notebook. The code is the same, but I am getting error on a concatenation.
According to the tensor documentation:
layers.Concatenate(axis = along which to concatenate)
The sample code uses:
x=layers.Concatenate()(outputs)
without mentioning the axis to which the concatenation is supposed to happen. Using the code as it is, I get the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-15-8d7152b44077> in <module>()
----> 1 run(0)
5 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/merge.py in build(self, input_shape)
491 # Used purely for shape validation.
492 if not isinstance(input_shape[0], tuple) or len(input_shape) < 2:
--> 493 raise ValueError('A `Concatenate` layer should be called '
494 'on a list of at least 2 inputs')
495 if all(shape is None for shape in input_shape):
ValueError: A `Concatenate` layer should be called on a list of at least 2 inputs
I get the same error even when I add the argument axis = 1.
Here is my sample code:
import os
import gc
import joblib
import pandas as pd
import numpy as np
from sklearn import metrics, preprocessing
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import Model
from tensorflow.keras.models import load_model
from tensorflow.keras import callbacks
from tensorflow.keras import backend as K
from tensorflow.keras import utils
def create_model(data, catcols):
#init lit of inputs for embedding
inputs = []
#init list of outputs for embeddings
outputs = []
#loop over all categorical colums
for c in catcols:
#find the number of unique values in the column
num_unique_values = int(data[c].nunique())
#simple dimension of embedding calculator
#min size is half of the number of unique values
#max size is 50. max size depends on the number of unique
#categories too.
embed_dim = int(min(np.ceil((num_unique_values)/2), 50))
#simple keras input layer with size 1
inp = layers.Input(shape=(1, ))
#add embedding layer to raw input
#embedding size is always 1 more than uique values in input
out = layers.Embedding(num_unique_values + 1, embed_dim, name = c)(inp)
#1-d spatial dropout is the standard for embedding layers
#you can use it in NLP tasks too
out = layers.SpatialDropout1D(0.3)(out)
#reshape the input to the dimension of embedding
#this becomes our output layer for current feature
out = layers.Reshape(target_shape=(embed_dim, ))(out)
#add input to input list
inputs.append(inp)
#add out to output list
outputs.append(out)
#concatenate all output layers
x = layers.Concatenate(axis=1)(outputs)
#add a batchnorm layer
x = layers.BatchNormalization()(x)
#add layers with dropout
x= layers.Dense(300, acivation="relu")(x)
x= layers.Dropout(0.3)(x)
x= layers.BatchNormalization()(x)
x = layers.Dense(300, activation="relu")(x)
x = layers.Dropout(0.3)(x)
x = layers.BatchNormalization()(x)
y = layers.Dense(2, activation="softmax")(x)
#create final model
model = Model(inputs=inputs, outputs=y)
model.compile(loss='binary_crossentropy', optimizer='adam')
return model_selection
from sklearn import metrics, preprocessing
def run(fold):
df = pd.read_csv('/content/drive/My Drive/train_folds.csv')
features = [f for f in df.columns if f not in ("id", "target", "kfold")]
for col in features:
df.loc[:, col] = df[col].astype(str).fillna("NONE") #fill the nan values with nan. The function "fillna" was created to fill only one column;
# at a time it is not tested against a list of columns.
for feat in features:
lbl_enc = preprocessing.LabelEncoder() #create the label encoder
df.loc[:, feat] = lbl_enc.fit_transform(df[feat].values) #label encode all columns
#split the data
df_train = df[df.kfold != fold].reset_index(drop=True) #get the training data. Remember that we created a column name kfold. if kfold = fold,
#then we train all the other folds but the argument fold.
df_valid = df[df.kfold == fold].reset_index(drop=True) #get the validation data. The argument fold is reserved for validation
#create the model
model = create_model(df, features)
#our features are lists of lists
xtrain = [df_train[features].values[:, k] for k in range(len(features))]
xvalid = [df_valid[features].values[:, k] for k in range(len(features))]
#retrieve target values
ytain = df_train.target.values
yvalid = df_valid.target.values
#convert target columsn to categories
#binarization
ytrain_cat = utils.to_categorical(ytrain)
yvalid_cat = utils.to_categorical(yvalid)
#fit the model
model.fit(xtrain, ytrain_cat, validation_data=(xvalid, yvalid_cat), verbose=1, batch_size=1024, epochs=3)
#generation validation predictions
valid_preds = model.predict(xvalid)[:, 1]
#metrics
print(metrics.roc_auc_score(yvalid, valid_preds))
#save memory space by clearing session
K.clear_session()
I ran the code:
run(0)
I get an error using gradient visualization with transfer learning in TF 2.0. The gradient visualization works on a model that does not use transfer learning.
When I run my code I get the error:
assert str(id(x)) in tensor_dict, 'Could not compute output ' + str(x)
AssertionError: Could not compute output Tensor("block5_conv3/Identity:0", shape=(None, 14, 14, 512), dtype=float32)
When I run the code below it errors. I think there's an issue with the naming conventions or connecting inputs and outputs from the base model, vgg16, to the layers I'm adding. Really appreciate your help!
"""
Broken example when grad_model is created.
"""
!pip uninstall tensorflow
!pip install tensorflow==2.0.0
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
IMAGE_PATH = '/content/cat.3.jpg'
LAYER_NAME = 'block5_conv3'
model_layer = 'vgg16'
CAT_CLASS_INDEX = 281
imsize = (224,224,3)
img = tf.keras.preprocessing.image.load_img(IMAGE_PATH, target_size=(224, 224))
plt.figure()
plt.imshow(img)
img = tf.io.read_file(IMAGE_PATH)
img = tf.image.decode_jpeg(img)
img = tf.cast(img, dtype=tf.float32)
# img = tf.keras.preprocessing.image.img_to_array(img)
img = tf.image.resize(img, (224,224))
img = tf.reshape(img, (1, 224,224,3))
input = layers.Input(shape=(imsize[0], imsize[1], imsize[2]))
base_model = tf.keras.applications.VGG16(include_top=False, weights='imagenet',
input_shape=(imsize[0], imsize[1], imsize[2]))
# base_model.trainable = False
flat = layers.Flatten()
dropped = layers.Dropout(0.5)
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
fc1 = layers.Dense(16, activation='relu', name='dense_1')
fc2 = layers.Dense(16, activation='relu', name='dense_2')
fc3 = layers.Dense(128, activation='relu', name='dense_3')
prediction = layers.Dense(2, activation='softmax', name='output')
for layr in base_model.layers:
if ('block5' in layr.name):
layr.trainable = True
else:
layr.trainable = False
x = base_model(input)
x = global_average_layer(x)
x = fc1(x)
x = fc2(x)
x = prediction(x)
model = tf.keras.models.Model(inputs = input, outputs = x)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
loss='binary_crossentropy',
metrics=['accuracy'])
This portion of the code is where the error lies. I'm not sure what is the correct way to label inputs and outputs.
# Create a graph that outputs target convolution and output
grad_model = tf.keras.models.Model(inputs = [model.input, model.get_layer(model_layer).input],
outputs=[model.get_layer(model_layer).get_layer(LAYER_NAME).output,
model.output])
print(model.get_layer(model_layer).get_layer(LAYER_NAME).output)
# Get the score for target class
# Get the score for target class
with tf.GradientTape() as tape:
conv_outputs, predictions = grad_model(img)
loss = predictions[:, 1]
The section below is for plotting a heatmap of gradcam.
print('Prediction shape:', predictions.get_shape())
# Extract filters and gradients
output = conv_outputs[0]
grads = tape.gradient(loss, conv_outputs)[0]
# Apply guided backpropagation
gate_f = tf.cast(output > 0, 'float32')
gate_r = tf.cast(grads > 0, 'float32')
guided_grads = gate_f * gate_r * grads
# Average gradients spatially
weights = tf.reduce_mean(guided_grads, axis=(0, 1))
# Build a ponderated map of filters according to gradients importance
cam = np.ones(output.shape[0:2], dtype=np.float32)
for index, w in enumerate(weights):
cam += w * output[:, :, index]
# Heatmap visualization
cam = cv2.resize(cam.numpy(), (224, 224))
cam = np.maximum(cam, 0)
heatmap = (cam - cam.min()) / (cam.max() - cam.min())
cam = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
output_image = cv2.addWeighted(cv2.cvtColor(img.astype('uint8'), cv2.COLOR_RGB2BGR), 0.5, cam, 1, 0)
plt.figure()
plt.imshow(output_image)
plt.show()
I also asked this to the tensorflow team on github at https://github.com/tensorflow/tensorflow/issues/37680.
I figured it out. If you set up the model extending the vgg16 base model with your own layers, rather than inserting the base model into a new model like a layer, then it works.
First set up the model and be sure to declare the input_tensor.
inp = layers.Input(shape=(imsize[0], imsize[1], imsize[2]))
base_model = tf.keras.applications.VGG16(include_top=False, weights='imagenet', input_tensor=inp,
input_shape=(imsize[0], imsize[1], imsize[2]))
This way we don't have to include a line like x=base_model(inp) to show what input we want to put in. That's already included in tf.keras.applications.VGG16(...).
Instead of putting this vgg16 base model inside another model, it's easier to do gradcam by adding layers to the base model itself. I grab the output of the last layer of VGG16 (with the top removed), which is the pooling layer.
block5_pool = base_model.get_layer('block5_pool')
x = global_average_layer(block5_pool.output)
x = fc1(x)
x = prediction(x)
model = tf.keras.models.Model(inputs = inp, outputs = x)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
loss='binary_crossentropy',
metrics=['accuracy'])
Now, I grab the layer for visualization, LAYER_NAME='block5_conv3'.
# Create a graph that outputs target convolution and output
grad_model = tf.keras.models.Model(inputs = [model.input],
outputs=[model.output, model.get_layer(LAYER_NAME).output])
print(model.get_layer(LAYER_NAME).output)
# Get the score for target class
# Get the score for target class
with tf.GradientTape() as tape:
predictions, conv_outputs = grad_model(img)
loss = predictions[:, 1]
print('Prediction shape:', predictions.get_shape())
# Extract filters and gradients
output = conv_outputs[0]
grads = tape.gradient(loss, conv_outputs)[0]
We (I plus a number of team members developing a project) found a similar problem with a code implementing Grad-CAM that we found in a tutorial.
That code didn't work with a model consisting of the base model of VGG19 plus a few extra layers added on top of it. The problem was that the VGG19 base model was inserted as a "layer" inside our model, and apparently the GradCAM code didn't know how to deal with it - we were getting a "Graph disconnected..." error. Then after some debugging (carried out by another team member, not me) we managed to modify the original code to make it work for this kind of model that contains another model inside it. The idea is to add the inner model as an extra argument of the class GradCAM. Since this may be helpful to others I am including the modified code below (we also renamed the GradCAM class as My_GradCAM).
class My_GradCAM:
def __init__(self, model, classIdx, inner_model=None, layerName=None):
self.model = model
self.classIdx = classIdx
self.inner_model = inner_model
if self.inner_model == None:
self.inner_model = model
self.layerName = layerName
[...]
gradModel = tensorflow.keras.models.Model(inputs=[self.inner_model.inputs],
outputs=[self.inner_model.get_layer(self.layerName).output,
self.inner_model.output])
Then the class can be instantiated by adding the inner model as the extra argument, e.g.:
cam = My_GradCAM(model, None, inner_model=model.get_layer("vgg19"), layerName="block5_pool")
I hope this helps.
Edit: Credit to Mirtha Lucas for doing the debugging and finding the solution.
After a lot of struggle, I condense the way to draw the heat map when you are using transfer learning. Here is the keras official tutorial
The issue I encounter is that when I'm trying to draw the heat map
from my model, the densenet can be only seen as functional layer in my
model. So the make_gradcam_heatmap can not figure out the layer that
inside functional layer. As the 5th layer shows.
Therefore, to simulate the Keras official document, I need to only use the densenet as the model for visualization. Here is the step
Only Take out the model from your model
dense_model = dense_model.get_layer('densenet121')
Copy the weight from dense model to your new initiated model
inputs = tf.keras.Input(shape=(224, 224, 3))
model = model_builder(weights="imagenet", include_top=True, input_tensor=inputs)
for layer, dense_layer in zip(model.layers[1:], dense_model.layers[1:]):
layer.set_weights(dense_layer.get_weights())
relu = model.get_layer('relu')
x = tf.keras.layers.GlobalAveragePooling2D()(relu.output)
outputs = tf.keras.layers.Dense(5)(x)
model = tf.keras.models.Model(inputs = inputs, outputs = outputs)
Draw the heat map
preprocess_input = keras.applications.densenet.preprocess_input
img_array = preprocess_input(get_img_array(img_path, size=(224, 224)))
heatmap = make_gradcam_heatmap(img_array, model, 'bn')
plt.matshow(heatmap)
plt.show()
get_img_array, make_gradcam_heatmap and save_and_display_gradcam are kept in still. Follow the keras tutorial then you are good to go.
I have the following Convolutional Neural Network (CNN) in Keras, but keep having the prediction on the test images as class "1", provided that the training data is balanced. Any ideas on how I can solve this issue? Thanks.
from keras import layers
from keras import models
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
import cv2
import numpy as np
import os
train_directory = '/train'
validation_directory = '/valid'
test_directory = '/test'
results_directory = '/results'
correct_classification = 0
number_of_test_images = 0
labels = []
prediction_probabilities = []
model = models.Sequential()
model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(512,512,3)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(128,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(256,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(512,(3,3),activation='relu'))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Flatten())
model.add(layers.Dense(1024,activation='relu'))
model.add(layers.Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['acc'])
train_data = ImageDataGenerator(rescale=1.0/255)
validation_data = ImageDataGenerator(rescale=1.0/255)
train_generator = train_data.flow_from_directory(train_directory,target_size=(512,512),batch_size=20,class_mode='binary')
validation_generator = validation_data.flow_from_directory(validation_directory,target_size=(512,512),batch_size=20,class_mode='binary')
history = model.fit_generator(train_generator,
steps_per_epoch=10,
epochs=10,
validation_data=validation_generator,
validation_steps=5)
model.save('my_model.h5')
for root, dirs, files in os.walk(test_directory):
for file in files:
img = cv2.imread(root + '/' + file)
img = cv2.resize(img,(512,512),interpolation=cv2.INTER_AREA)
img = np.expand_dims(img, axis=0)
img = img/255.0
if os.path.basename(root) == 'nevus':
label = 1
elif os.path.basename(root) == 'melanoma':
label = 0
labels.append(label)
img_class = model.predict_classes(img)
img_class_probability = model.predict(img)
prediction_probability = img_class_probability[0]
prediction_probabilities.append(prediction_probability)
prediction = img_class[0]
if prediction == label:
correct_classification = correct_classification + 1
The output of your network is the cause of constant prediction of "1". You need to have an two output units in your final layer. A similar question is asked here, and I have quoted the explanation from Matias below for convenience.
Softmax normalizes by the sum of exponential of each output. Since there is one output, the only possible output is 1.0.
For a binary classifier you can either use a sigmoid activation with the "binary_crossentropy" loss, or put two output units at the last layer, keep using softmax and change the loss to categorical_crossentropy.
I'm new to NN.
I built a NN for image understanding using a triplet loss method.
And I think that I'm missing some basic knowledge about how to use this method for predicting an image tag.
After I have my model built, how should I predict a sample image?
Because my model input is a triplet - what the triplet should be constructed from?
As for the theory, I think that I should somehow get the embedding matrix for the test image and then use knn with k=1 to get the nearest embedding. But i am clueless about how to do that in practice
My code is running and generating the model:
import numpy as np
import random
import os
import imageio
import matplotlib.pyplot as plt
import pandas as pd
from time import time
import tensorflow as tf
tf.set_random_seed(1)
from PIL import Image
from keras.models import Model
from keras.layers import Input, Lambda, concatenate
from keras.optimizers import Adam
from keras import backend as K
from keras.layers import Conv2D, PReLU, Flatten, Dense
ALPHA = 0.2 # Triplet Loss Parameter
def get_triplets(features):
df_features = pd.DataFrame(features)
triplets = []
for index, row in df_features.iterrows():
same_tag = df_features.loc[df_features.iloc[:, -1] == row.iloc[-1]]
same_tag_indexes = list(set(same_tag.index) - {index})
diff_tag_indexes = list(set(df_features.index) - set(same_tag_indexes) - {index})
anchor = row.iloc[0]
anchor = anchor.reshape(-1, anchor.shape[0], anchor.shape[1], anchor.shape[2])
pos = df_features.iloc[random.choice(same_tag_indexes), :].iloc[0]
pos = pos.reshape(-1, pos.shape[0], pos.shape[1], pos.shape[2])
neg = df_features.iloc[random.choice(diff_tag_indexes), :].iloc[0]
neg = neg.reshape(-1, neg.shape[0], neg.shape[1], neg.shape[2])
triplets.append(list(list([anchor, pos, neg])))
return np.array(triplets)
def triplet_loss(x):
anchor, positive, negative = tf.split(x, 3, axis=1)
pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), ALPHA)
loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
return loss
# When fitting the model (i.e., model.fit()); use as an input [anchor_example,
# positive_example, negative_example] in that order and as an output zero.
# The reason to use the output as zero is that you are trying to minimize the
# triplet loss as much as possible and the minimum value of the loss is zero.
def create_embedding_network(input_shape):
input_shape = Input(input_shape)
x = Conv2D(32, (3, 3))(input_shape)
x = PReLU()(x)
x = Conv2D(64, (3, 3))(x)
x = PReLU()(x)
x = Flatten()(x)
x = Dense(10, activation='softmax')(x)
model = Model(inputs=input_shape, outputs=x)
return model
anchor_embedding = None
# Builds an embedding for each example (i.e., positive, negative, anchor)
# Then calculates the triplet loss between their embedding.
# Then applies identity loss on the triplet loss value to minimize it on training.
def build_model(input_shape):
global anchor_embedding
# Standardizing the input shape order
K.set_image_data_format('channels_last')
positive_example = Input(shape=input_shape)
negative_example = Input(shape=input_shape)
anchor_example = Input(shape=input_shape)
# Create Common network to share the weights along different examples (+/-/Anchor)
embedding_network = create_embedding_network(input_shape)
positive_embedding = embedding_network(positive_example)
negative_embedding = embedding_network(negative_example)
anchor_embedding = embedding_network(anchor_example)
# loss = merge([anchor_embedding, positive_embedding, negative_embedding],
# mode=triplet_loss, output_shape=(1,))
merged_output = concatenate([anchor_embedding, positive_embedding, negative_embedding])
loss = Lambda(triplet_loss, (1,))(merged_output)
model = Model(inputs=[anchor_example, positive_example, negative_example],
outputs=loss)
model.compile(loss='mean_absolute_error', optimizer=Adam())
return model
#start_time = time()
numOfPhotosPerTag = 10
#Change this line to your own drive path
baseDir = "C:/Intelligent systems/DNN/images/"
imagesHashtags = ["beer", "bigcity"]
imagesDir = [baseDir + str(x) for x in imagesHashtags]
images = ["/" + str(x) + ".jpg" for x in range(1, numOfPhotosPerTag + 1)]
allImages = []
for x in imagesDir:
allImages += [x + loc for loc in images]
imageio.imread(allImages[0], pilmode="RGB").shape
data = []
for x in allImages:
image = imageio.imread(x, pilmode="RGB")
tag = x.split('/')[-2]
data.append((image, tag))
data = np.array(data)
triplets = get_triplets(data)
model = build_model((256, 256, 3))
#model.fit(triplets, y=np.zeros(len(triplets)), batch_size=1)
for i in range(len(data)):
model.fit(list(triplets[0]), y=[0], batch_size=1, verbose=10)
If you've trained your embedding_network properly, you now don't need to use triplets any more.
Basically, the whole point of the triplet-loss concept is to learn an embedding that is compatible with a pre-defined metric (usually just the Euclidean distance for instance), and then use this embedding for simple KNN classification as you mentioned.
So take your labeled data and pass all the points through the embedding_network.
You now have a set of points in a (low-dimensional?) space, in which "close points" are of the same class. Again, this depends on the data, how successful the training was, etc.
The natural thing to then do is to pass your test point through the same embedding_network, and compare it's distances to the labeled points in the embedding-space.
KNN is then a viable solution for classification, but the real point is that your data has been transformed very-non-linearly into a "comfortable" space in which many classical and simple methods will work more easily; clustering, classification, you name it.
Hope that helps, and good luck!
If you use the name= to tag the "normal" half of the model, you can extract the layers you need. We use the following code for this:
def triplet2normal(model, keep_str='pos', out='score'):
""" take a triplet model, keep half of the model """
new_out_layer_name = next(model.name for model in model.layers if keep_str in model.name and out in model.name)
model_half = Model(inputs=[i for i in model.input if keep_str in i.name],
outputs=model.get_layer(new_out_layer_name).output
)
return model_half
Where the model is any triplet model - the example below is for recommendation on e.g. the movielens set:
# Input placeholders
positive_item_input = Input((1,), name='pos_item_input')
negative_item_input = Input((1,), name='neg_item_input')
user_input = Input((1,), name='pos_neg_user_input')
# Embedding layers for the items and for users
item_embedding_layer = Embedding(num_items, latent_dim, name='pos_neg_item_embedding', input_length=1)
user_embedding_layer = Embedding(num_users, latent_dim, name='pos_neg_user_embedding', input_length=1)
# Flatten the embedding layers
positive_item_embedding = Flatten(name='pos_item_embedded')(item_embedding_layer(positive_item_input))
negative_item_embedding = Flatten(name='neg_item_embedded')(item_embedding_layer(negative_item_input))
user_embedding = Flatten(name='pos_neg_user_embedded')(user_embedding_layer(user_input))
# Dot product - Matrix factorization
positive_scores = Dot(axes=1, name='positive_scores')([user_embedding, positive_item_embedding])
negative_scores = Dot(axes=1, name='negative_scores')([user_embedding, negative_item_embedding])
# Compare scores
delta_scores_1 = Subtract(name='delta_scores')([negative_scores, positive_scores])
loss = Activation('sigmoid')(delta_scores_1)
# Define model
model = Model(
inputs=[user_input, positive_item_input, negative_item_input],
outputs=loss,
)