Keras: Wrong Number of Training Epochs - python

I'm trying to build a class to quickly initialize and train an autoencoder for rapid prototyping. One thing I'd like to be able to do is quickly adjust the number of epochs I train for. However, it seems like no matter what I do, the model trains each layer for 100 epochs! I'm using the tensorflow backend.
Here is the code from the two offending methods.
def pretrain(self, X_train, nb_epoch = 10):
data = X_train
for ae in self.pretrains:
ae.fit(data, data, nb_epoch = nb_epoch)
ae.layers[0].output_reconstruction = False
ae.compile(optimizer='sgd', loss='mse')
data = ae.predict(data)
.........
def fine_train(self, X_train, nb_epoch):
weights = [ae.layers[0].get_weights() for ae in self.pretrains]
dims = self.dims
encoder = containers.Sequential()
decoder = containers.Sequential()
## add special input encoder
encoder.add(Dense(output_dim = dims[1], input_dim = dims[0],
weights = weights[0][0:2], activation = 'linear'))
## add the rest of the encoders
for i in range(1, len(dims) - 1):
encoder.add(Dense(output_dim = dims[i+1],
weights = weights[i][0:2], activation = self.act))
## add the decoders from the end
decoder.add(Dense(output_dim = dims[len(dims) - 2], input_dim = dims[len(dims) - 1],
weights = weights[len(dims) - 2][2:4], activation = self.act))
for i in range(len(dims) - 2, 1, -1):
decoder.add(Dense(output_dim = dims[i - 1],
weights = weights[i-1][2:4], activation = self.act))
## add the output layer decoder
decoder.add(Dense(output_dim = dims[0],
weights = weights[0][2:4], activation = 'linear'))
masterAE = AutoEncoder(encoder = encoder, decoder = decoder)
masterModel = models.Sequential()
masterModel.add(masterAE)
masterModel.compile(optimizer = 'sgd', loss = 'mse')
masterModel.fit(X_train, X_train, nb_epoch = nb_epoch)
self.model = masterModel
Any suggestions on how to fix the problem would be appreciated. My original suspicion was that it was something to do with tensorflow, so I tried running with the theano backend but encountered the same problem.
Here is a link to the full program.

Following the Keras doc, the fit method uses a default of 100 training epochs (nb_epoch=100):
fit(X, y, batch_size=128, nb_epoch=100, verbose=1, callbacks=[], validation_split=0.0, validation_data=None, shuffle=True, show_accuracy=False, class_weight=None, sample_weight=None)
I'm sure how you are running these methods, but following the "Typical usage" from the original code, you should be able to run something like (adjusting the variable num_epoch as required):
#Typical usage:
num_epoch = 10
ae = JPAutoEncoder(dims)
ae.pretrain(X_train, nb_epoch = num_epoch)
ae.train(X_train, nb_epoch = num_epoch)
ae.predict(X_val)

Related

ValueError: No gradients provided for any variable: ['tf_deberta_v2_for_sequence_classification_1/deberta/embeddings/word_embeddings

I am trying to fine tune a transformer model for text classification but I am having trouble training the model. I have tried many things but none of them seem to work. I have also tried different solutions on other question but they didn't work. I am using 'microsoft/deberta-v3-base' model for fine tuning. Here's my code:
train_dataset = Dataset.from_pandas(df_tr[['text', 'label']]).class_encode_column("label")
val_dataset = Dataset.from_pandas(df_tes[['text', 'label']]).class_encode_column("label")
train_tok_dataset = train_dataset.map(tokenizer_func, batched=True, remove_columns=('text'))
val_tok_dataset = val_dataset.map(tokenizer_func, batched=True, remove_columns=('text'))
from transformers import TFAutoModelForSequenceClassification
model = TFAutoModelForSequenceClassification.from_pretrained(config.model_name, num_labels=3)
transformer_model = TFAutoModelForSequenceClassification.from_pretrained(config.model_name, output_hidden_states=True)
input_ids = tf.keras.Input(shape=(config.max_len, ),dtype='int32')
attention_mask = tf.keras.Input(shape=(config.max_len, ), dtype='int32')
transformer = transformer_model([input_ids, attention_mask])
hidden_states = transformer[1] # get output_hidden_states
#print(hidden_states)
hidden_states_size = 4 # count of the last states
hiddes_states_ind = list(range(-hidden_states_size, 0, 1))
selected_hiddes_states = tf.keras.layers.concatenate(tuple([hidden_states[i] for i in hiddes_states_ind]))
# Now we can use selected_hiddes_states as we want
output = tf.keras.layers.Dense(128, activation='relu')(selected_hiddes_states)
output=tf.keras.layers.Flatten()(output)
output = tf.keras.layers.Dense(3, activation='softmax')(output)
model = tf.keras.models.Model(inputs = [input_ids, attention_mask], outputs = output)
from transformers import create_optimizer
import tensorflow as tf
batch_size = 8
num_epochs = config.epochs
#batches_per_epoch = len(tokenized_tweets["train"]) // batch_size
total_train_steps = int(num_steps * num_epochs)
optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=num_steps/2)
model.compile(optimizer=optimizer)
with tf.device('GPU:0'):
model.fit(x=[np.array(train_tok_dataset["input_ids"]),np.array(train_tok_dataset["attention_mask"])],
y=tf.keras.utils.to_categorical(y_train,num_classes=3),
validation_data=([np.array(val_tok_dataset["input_ids"]),np.array(val_tok_dataset["attention_mask"])],tf.keras.utils.to_categorical(y_test,num_classes=3)),
epochs=config.epochs,class_weight={0:0.57,1:0.18,2:0.39})
It seems like a small issue, but I am new to tensorflow and transformers so I couldn't sort it out myself.
I would say it's probably due to the fact that you are not adding a loss to the compilation, thus no gradient can be computed wrt it:
model.compile(optimizer=optimizer)
^^^^^^^^^^^^^^^^^^^^---- no "loss = tf.keras.losses...
Maybe you're just missing an = on the right side of validation_data.
model.fit(
x=[np.array(...),np.array(...)],
y=tf.keras.utils.to_categorical(...),
validation_data=([np.array(...), np.array(...)], tf.keras.utils.to_categorical(...)),
...
)

StellarGraph PaddedGraphGenerator - how to provide specific training, validation and test sets

I'm trying to train a basic Graph Neural Network using the StellarGraph library, in particular starting from the example provided in [0].
The example works fine, but now I would like to repeat the same exercize removing the N-Fold Crossvalidation and providing specific training, validation and test sets. I'm trying to do so with the following code:
# One hot encoding
graph_training_set_labels_encoded = pd.get_dummies(graphs_training_set_labels, drop_first=True)
graph_validation_set_labels_encoded = pd.get_dummies(graphs_validation_set_labels, drop_first=True)
graphs = graphs_training_set + graphs_validation_set
# Graph generator preparation
generator = PaddedGraphGenerator(graphs=graphs)
train_gen = generator.flow([x for x in range(0, len(graphs_training_set))],
targets=graph_training_set_labels_encoded,
batch_size=batch_size)
valid_gen = generator.flow([x for x in range(len(graphs_training_set),
len(graphs_training_set) + len(graphs_validation_set))],
targets=graph_validation_set_labels_encoded,
batch_size=batch_size)
# Stopping criterium
es = EarlyStopping(monitor="val_loss",
min_delta=0,
patience=20,
restore_best_weights=True)
# Model definition
gc_model = GCNSupervisedGraphClassification(layer_sizes=[64, 64],
activations=["relu", "relu"],
generator=generator,
dropout=dropout_value)
x_inp, x_out = gc_model.in_out_tensors()
predictions = Dense(units=32, activation="relu")(x_out)
predictions = Dense(units=16, activation="relu")(predictions)
predictions = Dense(units=1, activation="sigmoid")(predictions)
# Creating Keras model and preparing it for training
model = Model(inputs=x_inp, outputs=predictions)
model.compile(optimizer=Adam(adam_value), loss=binary_crossentropy, metrics=["acc"])
# GNN Training
history = model.fit(train_gen, epochs=num_epochs, validation_data=valid_gen, verbose=0, callbacks=[es])
# Calculate performance on the validation data
test_metrics = model.evaluate(valid_gen, verbose=0)
valid_acc = test_metrics[model.metrics_names.index("acc")]
print(f"Test Accuracy model = {valid_acc}")
Where graphs_training_set and graphs_validation_set are lists of StellarDiGraphs.
I am able to run this piece of code, but it provides NaN as result. What could be the problem?
Since it is the first time I am using StellarGraph and in particular PaddedGraphGenerator. I think my mistake rely on the usage of that generator, but providing training set and validation set in different manner didn't produce better results.
Thank you in advance.
UPDATE Fixed I typo in the code, as pointed out here (thanks to george123).
[0] https://stellargraph.readthedocs.io/en/stable/demos/graph-classification/gcn-supervised-graph-classification.html
I found a solution digging in the StellarGraph documentation for PaddedGraphGenerator and GCN Neural Network Class GCNSupervisedGraphClassification. Furthermore, I have found a similar question on StellarGraph Issue Tracker which also points out to the solution.
# Graph generator preparation
generator = PaddedGraphGenerator(graphs=graphs)
train_gen = generator.flow([x for x in range(0, num_graphs_for_training)],
targets=training_graphs_labels,
batch_size=35)
valid_gen = generator.flow([x for x in range(num_graphs_for_training, num_graphs_for_training + num_graphs_for_validation)],
targets=validation_graphs_labels,
batch_size=35)
# Stopping criterium
es = EarlyStopping(monitor="val_loss",
min_delta=0.001,
patience=10,
restore_best_weights=True)
# Model definition
gc_model = GCNSupervisedGraphClassification(layer_sizes=[64, 64],
activations=["relu", "relu"],
generator=generator,
dropout=dropout_value)
x_inp, x_out = gc_model.in_out_tensors()
predictions = Dense(units=32, activation="relu")(x_out)
predictions = Dense(units=16, activation="relu")(predictions)
predictions = Dense(units=1, activation="sigmoid")(predictions)
# Let's create the Keras model and prepare it for training
model = Model(inputs=x_inp, outputs=predictions)
model.compile(optimizer=Adam(adam_value), loss=binary_crossentropy, metrics=["acc"])
# GNN Training
history = model.fit(train_gen, epochs=num_epochs, validation_data=valid_gen, verbose=1, callbacks=[es])
# Evaluate performance on the validation data
valid_metrics = model.evaluate(valid_gen, verbose=0)
valid_acc = valid_metrics[model.metrics_names.index("acc")]
# Define test set indices temporary vars
index_begin_test_set = num_graphs_for_training + num_graphs_for_validation
index_end_test_set = index_begin_test_set + num_graphs_for_testing
test_set_indices = [x for x in range(index_begin_test_set, index_end_test_set)]
# Evaluate performance on test set
generator_for_test_set = PaddedGraphGenerator(graphs=graphs)
test_gen = generator_for_test_set.flow(test_set_indices)
result = model.predict(test_gen)

MultiWorkerMirroredStrategy hanging before first epoch?

I'm trying to run a simple MNIST neural net on multiple cluster nodes (3 nodes with 1 GPU each), but it keeps stopping before the first epoch prints. I'm able to get all the nodes to sync, but right before it starts (maybe in the model.fit function) it just stops and doesn't do anything.
Any help is appreciated!
My TF_CONFIG looks like this:
TF_CONFIG='{"cluster": {"worker": ["ip1:88888", "ip2:88888", "ip3:88888"]}, "task": {"index": 0, 1, or 2, "type": "worker"}}' python fileName.py
And my code looks like this:
import tensorflow as tf
from tensorflow import keras
import time
def get_compiled_model():
# Make a simple 2-layer densely-connected neural network.
inputs = keras.Input(shape=(784,))
x = keras.layers.Dense(256, activation="relu")(inputs)
x = keras.layers.Dense(256, activation="relu")(x)
outputs = keras.layers.Dense(10)(x)
model = keras.Model(inputs, outputs)
model.compile(
optimizer=keras.optimizers.Adam(),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[keras.metrics.SparseCategoricalAccuracy()],
)
return model
def get_dataset():
batch_size = 32
num_val_samples = 10000
# Return the MNIST dataset in the form of a `tf.data.Dataset`.
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Preprocess the data (these are Numpy arrays)
x_train = x_train.reshape(-1, 784).astype("float32") / 255
x_test = x_test.reshape(-1, 784).astype("float32") / 255
y_train = y_train.astype("float32")
y_test = y_test.astype("float32")
# Reserve num_val_samples samples for validation
x_val = x_train[-num_val_samples:]
y_val = y_train[-num_val_samples:]
x_train = x_train[:-num_val_samples]
y_train = y_train[:-num_val_samples]
return (
tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size),
tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(batch_size),
tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size),
)
# Create a MirroredStrategy.
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
print("Number of devices: {}".format(strategy.num_replicas_in_sync))
# Open a strategy scope.
with strategy.scope():
# Everything that creates variables should be under the strategy scope.
# In general this is only model construction & `compile()`.
model = get_compiled_model()
# Train the model on all available devices.
train_dataset, val_dataset, test_dataset = get_dataset()
start = time.time()
print("Fit")
model.fit(train_dataset, epochs=1, verbose=1, validation_data=val_dataset, steps_per_epoch=25)
end = time.time()
print("Time:", end - start)
# Test the model on all available devices.
model.evaluate(test_dataset)

Accuracy not growing across epochs on keras

I'm new to machine learning and deep learning and I'm trying to classify texts from 5 categories using neural networks. For that, I made a dictionary in order to translate the words to indexes, finally getting an array with lists of indexes. Moreover I change the labels to integers. I also did the padding and that stuff. The problem is that when I fit the model the accuracy keeps quite low (~0.20) and does not change across the epochs. I have tried to change a lot of params, like the size of the vocabulary, number of neurones, dropout probability, optimizer parameter, etc. The key parts of the code are below.
# Arrays with indexes (that works fine)
X_train = tokens_to_indexes(tokenized_tr_mrp, vocab, return_vocab=False)
X_test, vocab_dict = tokens_to_indexes(tokenized_te_mrp, vocab)
# Labels to integers
labels_dict = {}
labels_dict['Alzheimer'] = 0
labels_dict['Bladder Cancer'] = 1
labels_dict['Breast Cancer'] = 2
labels_dict['Cervical Cancer'] = 3
labels_dict['Negative'] = 4
y_train = np.array([labels_dict[i] for i in y_tr])
y_test = np.array([labels_dict[i] for i in y_te])
# One-hot encoding of labels
from keras.utils import to_categorical
encoded_train = to_categorical(y_train)
encoded_test = to_categorical(y_test)
# Padding
max_review_length = 235
X_train_pad = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test_pad = sequence.pad_sequences(X_test, maxlen=max_review_length)
# Model
# Vocab size
top_words = len(list(vocab_dict.keys()))
# Neurone type
rnn = LSTM
# dropout
set_dropout = True
p = 0.2
# embedding size
embedding_vector_length = 64
# regularization strength
L = 0.0005
# Number of neurones
N = 50
# Model
model = Sequential()
# Embedding layer
model.add(Embedding(top_words,
embedding_vector_length,
embeddings_regularizer=regularizers.l1(l=L),
input_length=max_review_length
#,embeddings_constraint=UnitNorm(axis=1)
))
# Dropout layer
if set_dropout:
model.add(Dropout(p))
# Recurrent layer
model.add(rnn(N))
# Output layer
model.add(Dense(5, activation='softmax'))
# Compilation
model.compile(loss='categorical_crossentropy',
optimizer=Adam(lr=0.001),
metrics=['Accuracy'])
# Split training set for validation
X_tr, X_va, y_tr_, y_va = train_test_split(X_train_pad, encoded_train,
test_size=0.3, random_state=2)
# Parameters
batch_size = 50
# N epochs
n_epocas = 20
best_val_acc = 0
best_val_loss = 1e20
best_i = 0
best_weights = []
acum_tr_acc = []
acum_tr_loss = []
acum_val_acc = []
acum_val_loss = []
# Training
for e in range(n_epocas):
h = model.fit(X_tr, y_tr_,
batch_size=batch_size,
validation_data=(X_va, y_va),
epochs=1, verbose=1)
acum_tr_acc = acum_tr_acc + h.history['accuracy']
acum_tr_loss = acum_tr_loss + h.history['loss']
val_acc = h.history['val_accuracy'][0]
val_loss = h.history['val_loss'][0]
acum_val_acc = acum_val_acc + [val_acc]
acum_val_loss = acum_val_loss + [val_loss]
# if val_acc > best_val_acc:
if val_loss < best_val_loss:
best_i = len(acum_val_acc)-1
best_val_acc = val_acc
best_val_loss = val_loss
best_weights = model.get_weights().copy()
if len(acum_tr_acc)>1 and (len(acum_tr_acc)+1) % 1 == 0:
if e>1:
clear_output()
The code you posted is really bad practice.
You can either train for n_epocas using your current method and add callbacks to get the best weights (ex ModelCheckpoint) or use tf.GradientTape but using model.fit() for one epoch at a time can lead to weird results, since your optimizer doesn't know which epoch it is at.
I suggest keeping your current code but training for n_epocas all in one go and report the results here (accuracy + loss).
Someone gave me the solution. I just had to change this line:
model.compile(loss='categorical_crossentropy',
optimizer=Adam(lr=0.001),
metrics=['Accuracy'])
For this:
model.compile(loss='categorical_crossentropy',
optimizer=Adam(lr=0.001),
metrics=['acc'])
I also changed the lines in the final loop relating to accuracy. The one-hot encoding was necessary as well.

Keras model doesn't learn at all

My model weights (I output them to weights_before.txt and weights_after.txt) are precisely the same before and after the training, i.e. the training doesn't change anything, there's no fitting happening.
My data look like this (I basically want the model to predict the sign of feature, result is 0 if feature is negative, 1 if positive):
,feature,zerosColumn,result
0,-5,0,0
1,5,0,1
2,-3,0,0
3,5,0,1
4,3,0,1
5,3,0,1
6,-3,0,0
...
Brief summary of my approach:
Load the data.
Split it column-wise to x (feature) and y (result), split these two row-wise to test and validation sets.
Transform these sets into TimeseriesGenerators (not necessary in this scenario but I want to get this setup working and I don't see any reason why it shouldn't).
Create and compile simple Sequential model with few Dense layers and softmax activation on its output layer, use binary_crossentropy as loss function.
Train the model... nothing happens!
Complete code follows:
import keras
import pandas as pd
import numpy as np
np.random.seed(570)
TIMESERIES_LENGTH = 1
TIMESERIES_SAMPLING_RATE = 1
TIMESERIES_BATCH_SIZE = 1024
TEST_SET_RATIO = 0.2 # the portion of total data to be used as test set
VALIDATION_SET_RATIO = 0.2 # the portion of total data to be used as validation set
RESULT_COLUMN_NAME = 'feature'
FEATURE_COLUMN_NAME = 'result'
def create_network(csv_path, save_model):
before_file = open("weights_before.txt", "w")
after_file = open("weights_after.txt", "w")
data = pd.read_csv(csv_path)
data[RESULT_COLUMN_NAME] = data[RESULT_COLUMN_NAME].shift(1)
data = data.dropna()
x = data.ix[:, 1:2]
y = data.ix[:, 3]
test_set_length = int(round(len(x) * TEST_SET_RATIO))
validation_set_length = int(round(len(x) * VALIDATION_SET_RATIO))
x_train_and_val = x[:-test_set_length]
y_train_and_val = y[:-test_set_length]
x_train = x_train_and_val[:-validation_set_length].values
y_train = y_train_and_val[:-validation_set_length].values
x_val = x_train_and_val[-validation_set_length:].values
y_val = y_train_and_val[-validation_set_length:].values
train_gen = keras.preprocessing.sequence.TimeseriesGenerator(
x_train,
y_train,
length=TIMESERIES_LENGTH,
sampling_rate=TIMESERIES_SAMPLING_RATE,
batch_size=TIMESERIES_BATCH_SIZE
)
val_gen = keras.preprocessing.sequence.TimeseriesGenerator(
x_val,
y_val,
length=TIMESERIES_LENGTH,
sampling_rate=TIMESERIES_SAMPLING_RATE,
batch_size=TIMESERIES_BATCH_SIZE
)
model = keras.models.Sequential()
model.add(keras.layers.Dense(10, activation='relu', input_shape=(TIMESERIES_LENGTH, 1)))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(10, activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(1, activation='softmax'))
for item in model.get_weights():
before_file.write("%s\n" % item)
model.compile(
loss=keras.losses.binary_crossentropy,
optimizer="adam",
metrics=[keras.metrics.binary_accuracy]
)
history = model.fit_generator(
train_gen,
epochs=10,
verbose=1,
validation_data=val_gen
)
for item in model.get_weights():
after_file.write("%s\n" % item)
before_file.close()
after_file.close()
create_network("data/sign_data.csv", False)
Do you have any ideas?
The problem is that you are using softmax as the activation function of last layer. Essentially, softmax normalizes its input to make the sum of the elements to be one. Therefore, if you use it on a layer with only one unit (i.e. Dense(1,...)), then it would always output 1. To fix this, change the activation function of last layer to sigmoid which outputs a value in the range (0,1).

Categories