I'm implementing an MLP with Keras and a custom loss function.
I notice model.compile() takes very much time: it seems doesn't end.
The loss that I passed to the compile() function is custom.
I'm also using another function that is used in the loss function.
This is my custom loss:
def get_top_one_probability(vector):
return (K.exp(vector) / K.sum(K.exp(vector)))
def custom_loss(groups_id_count, tf_session):
def listnet_loss(real_labels, predicted_labels):
losses = tf.Variable([[0.0]], tf.float32)
for group in groups_id_count:
start_range = 0
end_range = (start_range + group[1])
batch_real_labels = real_labels[start_range:end_range]
batch_predicted_labels = predicted_labels[start_range:end_range]
loss = -K.sum(get_top_one_probability(batch_real_labels)) * tf.math.log(get_top_one_probability(batch_predicted_labels))
losses = tf.concat([losses, loss], axis=0)
start_range = end_range
return K.mean(losses)
return listnet_loss
And this is the MLP code:
mlp = keras.models.Sequential()
# add input layer
mlp.add(
keras.layers.Dense(
units=training_dataset.shape[1],
input_shape = (training_dataset.shape[1], ),
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
activation='tanh')
)
# add hidden layer
mlp.add(
keras.layers.Dense(
units=training_dataset.shape[1] + 10,
input_shape = (training_dataset.shape[1] + 10,),
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
activation='relu')
)
# add output layer
mlp.add(
keras.layers.Dense(
units=1,
input_shape = (1, ),
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
activation='softmax')
)
# define SGD optimizer
sgd_optimizer = keras.optimizers.SGD(
lr=0.01, decay=0.01, momentum=0.9, nesterov=True
)
# compile model
print('Compiling model...\n')
mlp.compile(
optimizer=sgd_optimizer,
loss=custom_loss(groups_id_count, tf.compat.v1.Session())
)
mlp.summary() # print model settings
# Training
with tf.device('/GPU:0'):
print('Start training')
mlp.fit(training_dataset, training_dataset_labels, epochs=50, verbose=2, batch_size=training_dataset.shape[0], workers=10)
Why the compile() function takes very very much time? Thanks in advance
Related
I got the following error: RuntimeError: You must compile your model before training/testing. Use model.compile(optimizer, loss). However, i have already compiled my model so i don't understand what the problem is.
`vgg = VGG16(weights='imagenet',include_top=False,input_shape=(224,224,3))
for layer in vgg.layers:
layer.trainable = False #making all the layers non-trainable
x = Flatten()(vgg.output) #flattening out the last layer
predictions = Dense(2,activation='sigmoid')(x) #Dense layer to predict wether there is pneumonia or not
model = Model(inputs=vgg.input, outputs=predictions)
early_stopping_callbacks = tensorflow.keras.callbacks.EarlyStopping(patience = 15,
restore_best_weights = True,
verbose = 1)
base_model1 = VGG16(include_top = False, weights = "imagenet", input_shape = (224, 224, 3), pooling = "max",
classes = 2)
#base_model1.load_weights("../input/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5")
base_model1.summary()
model2 = Sequential()
model2.add(base_model1)
model2.add(Flatten())
model2.add(Dense(128, activation = "relu"))
model2.add(Dense(64, activation = "relu"))
model2.add(Dense(32, activation = "relu"))
model2.add(Dense(1, activation = "sigmoid"))
# freeze the layers
for layer in base_model1.layers:
layer.trainable = False
model2.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
history = model2.fit_generator(train_generator, epochs = EPOCH, validation_data = val_generator, steps_per_epoch = 10,
callbacks = [early_stopping_callbacks])
test_loss, test_accuracy = base_model1.evaluate(test_generator, steps = 50)
print("The testing accuracy is: ", test_accuracy * 100, "%")
print("The testing loss is: ", test_loss * 100, "%")`
base_model1 is my vgg16 model and test_generator is my test set
I was comparing loss for two simple MLP models with and without dropout on both TF/Keras and Pytorch frameworks (on Keras imdb dataset). But with PyTorch I am not getting the same results as I hoped for and was wondering perhaps what I am doing incorrectly.
# Keras - IMDB Dataset
model = Sequential()
model.add(Dense(16, activation = "relu", input_shape= (10000,)))
model.add(Dropout(0.5)) # comment out this line for no dropout model
model.add(Dense(16, activation = "relu"))
model.add(Dropout(0.5)) # comment out this line for no dropout model
model.add(Dense(1, activation = "sigmoid"))
model.compile(
optimizer = "rmsprop",
loss = "binary_crossentropy",
metrics = ["accuracy"]
)
history = model.fit(
X_train,
y_train,
epochs = 20,
batch_size = 512,
validation_data = (X_val, y_val)
)
The results I obtained in keras (Left figure without dropout and right with dropout)
# Pytorch - same IMDB dataset from keras
class MLP(nn.Module):
def __init__(self, in_dims, l1, l2, out_dims):
super(MLP, self).__init__()
self.fc1 = nn.Linear(in_dims, l1)
self.fc2 = nn.Linear(l1, l2)
self.fc3 = nn.Linear(l2, out_dims)
self.dropout = nn.Dropout(p=0.5)
def forward(self, X):
out = F.relu(self.fc1(X))
out = self.dropout(out) # comment out this line for no dropout model
out = F.relu(self.fc2(out))
out = self.dropout(out) # comment out this line for no dropout model
out = F.sigmoid(self.fc3(out))
return out
model = MLP(10000, 16, 16, 1)
optimizer = optim.RMSprop(model.parameters(), lr = 0.001)
criterion = nn.BCELoss()
min_val_loss = np.inf
losses = []
val_losses = []
accuracy = []
val_accuracy = []
for e in range(0,20):
running_loss = 0
for i,(X_train, y_train) in enumerate(train_loader):
yhat = model.forward(X_train)
loss = criterion(yhat.flatten(), y_train)
running_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(running_loss / (i+1)) #note its i+1 since i starts from 0
model.eval()
with torch.no_grad():
running_val_loss = 0
for i,(X_val, y_val) in enumerate(val_loader):
yhat_val = model.forward(X_val)
val_loss = criterion(yhat_val.flatten(), y_val)
running_val_loss += val_loss.item()
val_losses.append(running_val_loss / (i + 1))
if val_loss < min_val_loss:
best_params = model.state_dict()
min_val_loss = val_loss
print(f"epochs : {e}, train_loss : {loss}, val_loss : {val_loss}")
Figure on the left is the result from no dropout model which has similar results to the keras model. However the one with dropout doesnot have the same behaviour.
I'm trying to write a custom training loop. After creating the model, I have added some extra trainable parameter to some layers of my model. I have used these extra parameters to update my original parameter on every forward pass. But when I'm calculating the gradient, it's giving None for the extra parameter that i have added last. Code is given below:
model = Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(1,1)))
model.add(Dense(1, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.layers[1].add_weight(name="x1", shape=(1,), initializer=tf.keras.initializers.Constant(value=1.0),trainable=True)
dataset = tf.data.Dataset.from_tensor_slices((feature, labels))
for i, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
for par in model.layers[1].trainable_weights:
if "x1" in par.name:
bits = tf.convert_to_tensor(par)
for par in model.layers[1].trainable_weights:
if "kernel" in par.name:
par = bits + 1.0
x = model(x_batch_train, training = True)
loss = tf.keras.losses.SparseCategoricalCrossentropy(y_batch_train, x)
val = tape.gradient(loss, model.trainable_weights)
for v in val:
print(v)
Here, I have added one extra parameter called x1 and it's updating the kernel of Dense layer. But I'm getting None gradient for x1 parameter. The output is:
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
tf.Tensor([-0.], shape=(1,), dtype=float32)
None
tf.Tensor([[0. 0.]], shape=(1, 2), dtype=float32)
tf.Tensor([-0.5 0.5], shape=(2,), dtype=float32)
Why it's happening?
The problem is that the changes you are making to the layer's weights have no direct connection to the output of the model in the context of tf.GradientTape and are therefore not tracked. You could solve this with a simple custom layer:
import tensorflow as tf
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units=1):
super(DenseLayer, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.units], trainable=True)
self.b = self.add_weight(shape=(self.units,), initializer="zeros", trainable=True)
self.bits = self.add_weight(name="x1", shape=[int(input_shape[-1]),
self.units], initializer=tf.keras.initializers.ones(), trainable=True)
def call(self, inputs):
return tf.nn.relu(tf.matmul(inputs, (self.w + self.bits + 1.0)) + self.b)
dense_layer = DenseLayer(1)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(1,1)))
model.add(dense_layer)
model.add(tf.keras.layers.Dense(2, activation='softmax'))
print(model.summary())
dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1, 1)), tf.random.uniform((50, ), maxval=2, dtype=tf.int32))).batch(2)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
for i, (x_batch_train, y_batch_train) in enumerate(dataset):
with tf.GradientTape() as tape:
y = model(x_batch_train, training = True)
loss = loss_fn(y_batch_train, y)
val = tape.gradient(loss, model.trainable_weights)
for v in val:
print(v)
optimizer.apply_gradients(zip(val, model.trainable_variables))
Your idea is good I didn't extend from the last answer but this question is asked once about the custom layer and that you can do it for lstm by training as model.fit( ... )
It is not about the Gradient Tape.
[ Sample - Dense ]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Function
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs, num_add):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
self.num_add = num_add
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_outputs])
def call(self, inputs):
temp = tf.add( inputs, self.num_add )
temp = tf.matmul(temp, self.kernel)
return temp
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 32, 32, 4 )),
tf.keras.layers.Normalization(mean=3., variance=2.),
tf.keras.layers.Normalization(mean=4., variance=6.),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Reshape((128, 225)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96, return_sequences=True, return_state=False)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(96)),
])
layer = MyDenseLayer(10, 5)
model.add(layer)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(192, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.summary()
[ Output ]:
I'm trying to implement a bespoke loss calculation model but keep getting hit with recognition failures. eg: "ValueError: Unknown loss function: root_mean_squared_error_fraction. Please ensure this object is passed to the custom_objects argument."
The function "root_mean_squared_fraction" exists and is functioning, I know this because I call it elsewhere outside of the Keras model and it functions as expected. I'm clearly not understanding something about injecting this into the model definition and would appreciate any advice? Thanks.
from keras.models import load_model
from keras import backend as K
def root_mean_squared_error_fraction(y_true, y_pred):
return K.sqrt(K.mean(K.square((y_pred - y_true)/y_true)))
This is my model routine which does work when the =rmsef in the model.compile is replaced with ='mse':
def ResModel(Svect, Xvect, Yvect, dtrain):
model = Sequential()
model.add(LSTM(64, activation='relu',\
input_shape=(Xvect.shape[1], Xvect.shape[2]),\
return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(Yvect.shape[1]))
rmsef = root_mean_squared_error_fraction
model.compile(optimizer = "adam", loss = rmsef )
bmfile = 'bestmodel.h5'
earlystop = EarlyStopping(monitor='val_loss', mode='auto',\
verbose=0, patience=mpat, min_delta=0.005)
chkpoint = ModelCheckpoint(bmfile, monitor = 'val_loss', mode = 'auto',\
save_best_only = True )
history = model.fit(Xvect, Yvect, epochs=mcycl, batch_size=32,\
validation_split=dsplit, verbose=0,\
callbacks = [earlystop, chkpoint] )
saved = load_model('bestmodel.h5')
score = saved.evaluate(Xvect, Yvect, verbose=0)
print("Epoch: %04d yeilded best fit with overall loss of: %0.4f "\
% ((earlystop.stopped_epoch + 1), score ) )
Yvect = descalevector(Svect, saved.predict(Xvect),dtrain )
return Yvect, score
The idea is to train a CNN on a cosine similarity matrix of the hidden states of two bilstms.
I try to get the following code working, but it is failing giving the error message:
Graph disconnected: cannot obtain value for tensor
Tensor("bidirectional_4/concat:0", shape=(?, ?, 100), dtype=float32)
at layer "input_11". The following previous layers were accessed without issue: []
The code to train the model is the following:
def train_model(self, sentences_pair, is_similar,
embedding_meta_data_skt, embedding_meta_data_tib ,
model_save_directory='./'):
tokenizer_skt = embedding_meta_data_skt['tokenizer']
tokenizer_tib = embedding_meta_data_tib['tokenizer']
embedding_matrix_skt = embedding_meta_data_skt['embedding_matrix']
embedding_matrix_tib = embedding_meta_data_tib['embedding_matrix']
train_data_x1, train_data_x2, train_labels, leaks_train, \
val_data_x1, val_data_x2, val_labels, leaks_val = create_train_dev_set(tokenizer_skt, sentences_pair,
is_similar, self.max_sequence_length,
self.validation_split_ratio)
nb_words_skt = len(tokenizer_skt.word_index) + 1
nb_words_tib = len(tokenizer_tib.word_index) + 1
# Creating word embedding layer
embedding_layer_skt = Embedding(nb_words_skt, self.embedding_dim, weights=[embedding_matrix_skt],
input_length=self.max_sequence_length, trainable=False)
embedding_layer_tib = Embedding(nb_words_tib, self.embedding_dim, weights=[embedding_matrix_tib],
input_length=self.max_sequence_length, trainable=False)
# Creating LSTM Encoder
lstm_layer = Bidirectional(LSTM(self.number_lstm_units, dropout=self.rate_drop_lstm, recurrent_dropout=self.rate_drop_lstm,return_sequences=True))
# Creating LSTM Encoder layer for First Sentence
sequence_1_input = Input(shape=(self.max_sequence_length,), dtype='int32')
embedded_sequences_1 = embedding_layer_skt(sequence_1_input)
skt_lstm = lstm_layer(embedded_sequences_1)
# Creating LSTM Encoder layer for Second Sentence
sequence_2_input = Input(shape=(self.max_sequence_length,), dtype='int32')
embedded_sequences_2 = embedding_layer_tib(sequence_2_input)
tib_lstm = lstm_layer(embedded_sequences_2)
A_input = keras.Input(tensor=skt_lstm)
B_input = keras.Input(tensor=tib_lstm)
dist_output = keras.layers.Lambda(pairwise_cosine_sim)([skt_lstm,tib_lstm,A_input,B_input])
dist_output = Reshape((40,40,1))(dist_output)
input_shape = (40,40,1)
cnn_model = Conv2D(128, (2, 2), input_shape=input_shape)(dist_output)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(164, (2, 2))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(192,(3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(192, (3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(128, (3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = MaxPooling2D(pool_size=(2,2))(cnn_model)
cnn_model = Dropout(0.40)(cnn_model)
cnn_model = Flatten()(cnn_model)
# Fully connected layer
cnn_model = Dense(256)(cnn_model)
cnn_model = BatchNormalization()(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Dropout(0.5)(cnn_model)
cnn_model = Dense(num_classes)(cnn_model)
preds = Dense(1, activation='sigmoid')(cnn_model)
model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds)
model.compile(loss=keras.losses.binary_crossentropy,
optimizer=keras.optimizers.Adam(lr=learning_rate),
metrics=['accuracy'])
#model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['acc'])
filepath="skt-tib-bs" + str(batch_size) + "-" + "{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint('skt-tib.h5', monitor='val_acc')
callbacks_list = [checkpoint]
model.fit([train_data_x1, train_data_x2, leaks_train], train_labels,validation_data=([val_data_x1, val_data_x2, leaks_val], val_labels),
batch_size=batch_size,
epochs=epochs,
verbose=1,
class_weight = class_weight,
callbacks = callbacks_list)
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.save(file_name)
The definition of the function calculating the pairwise cosine similarity is the following:
def l2_norm(x, axis=None):
square_sum = K.sum(K.square(x), axis=axis, keepdims=True)
norm = K.sqrt(K.maximum(square_sum, K.epsilon()))
return norm
def pairwise_cosine_sim(A_B):
A,B,A_tensor,B_tensor = A_B
A_mag = l2_norm(A, axis=2)
B_mag = l2_norm(B, axis=2)
num = K.batch_dot(A_tensor, K.permute_dimensions(B_tensor, (0,2,1)))
den = (A_mag * K.permute_dimensions(B_mag, (0,2,1)))
dist_mat = num / den
return dist_mat
I Have been trying for a couple of hours to fix it, but it seems to be no good. Somewhere the input and outputs are not connected, but I just can't figure out where the problem lies. Any suggestions on this?
Either remove A_input and B_input entirely as they are not input layers in the first place and use skt_lstm and tib_lstm directly instead of them, or if you would like to keep them pass them as the inputs of the model as well when you are defining the Model since they are actually input layers:
model = Model(inputs=[sequence_1_input, sequence_2_input, A_input, B_input], outputs=preds)
However, you don't need to pass any corresponding arrays for them when calling fit method as they will be fed using their corresponding tensors skt_lstm and tib_lstm (i.e. they will act as wrappers around these tensors).