I'm working on a probabilistic forecast model using RNNs and want to log multiple runs with different parameters in Tensorboard to evaluate and compare them. I'm quite new to Tensorboard and couldn't really come up with a good way to organize my runs. I want to be able to sort through them in Tensorboard by parameter values, so currently I'm using this rather clunky approach:
tb = SummaryWriter(log_dir=f'runs/leakyrelu/cuda{cuda_id}/m_epochs{max_epochs}/lr{learning_rate}/'
f'bs{batch_size}/h_h{history_horizon}/f_h{forecast_horizon}/'
f'core_{core_net}/drop_fc{dropout_fc}/'
f'drop_core{dropout_core}')
Is there any smart way or convention on how to do this without creating mile-long filenames or directories kilometres deep?
It seems you are doing HyperParameter tuning with multiple parameters.
The best way to log such runs in Tensorboard is by using its HParams plugin.
Step1: Importing
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
After that, you create Hparam object of parameters you want to try different values for and create a summary writer.
Step 2: Creating Hparam object and summary writer
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([16, 32]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))
METRIC_ACCURACY = 'accuracy'
with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
hp.hparams_config(
hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
)
Your created object will look something like this:
HP_NUM_UNITS
HParam(name='num_units', domain=IntInterval(16, 32), display_name=None, description=None)
Step 3: Create a function for training and testing
def train_test_model(hparams):
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu),
tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
tf.keras.layers.Dense(10, activation=tf.nn.softmax),
])
model.compile(
optimizer=hparams[HP_OPTIMIZER],
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
)
model.fit(x_train, y_train, epochs=1) # Run with 1 epoch to speed things up for demo purposes
_, accuracy = model.evaluate(x_test, y_test)
return accuracy
In this function hparams is a dictionary of type:
{
HParam Object 1: VALUE-FOR-THE-OBJECT,
HParam Object 2: VALUE-FOR-THE-OBJECT,
HParam Object 3: VALUE-FOR-THE-OBJECT,
}
The actual dictionary looks like this:
{HParam(name='num_units', domain=Discrete([16, 32]), display_name=None, description=None): 32,
HParam(name='dropout', domain=RealInterval(0.1, 0.2), display_name=None, description=None): 0.2,
HParam(name='optimizer', domain=Discrete(['adam', 'sgd']), display_name=None, description=None): 'sgd'}
Step 4: Function for logging into the Tensorboard.
def run(run_dir, hparams):
with tf.summary.create_file_writer(run_dir).as_default():
hp.hparams(hparams) # record the values used in this trial
accuracy = train_test_model(hparams)
tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)
Here, run_dir is a path for each individual run.
Step 5: Trying different parameter:
session_num = 0
for num_units in HP_NUM_UNITS.domain.values:
for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
for optimizer in HP_OPTIMIZER.domain.values:
hparams = {
HP_NUM_UNITS: num_units,
HP_DROPOUT: dropout_rate,
HP_OPTIMIZER: optimizer,
}
run_name = "run-%d" % session_num
print('--- Starting trial: %s' % run_name)
print({h.name: hparams[h] for h in hparams})
run('logs/hparam_tuning/' + run_name, hparams)
session_num += 1
Note: num_units will take 2 values '16' and '32' not every value between 16 and 32.
Your Tensorboard will look like this:
Tabular View:
Scatter Plot View:
.
You can also combine this with Tensorboard callback in Keras by setting the path of the callback to run_dir.
For eg:
def train_test_model(hparams, run_dir):
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu),
tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(
optimizer=hparams[HP_OPTIMIZER],
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
callbacks = [
tf.keras.callbacks.TensorBoard(run_dir),
]
model.fit(x_train, y_train, epochs=10, callbacks = callbacks) # Run with 1 epoch to speed things up for demo purposes
_, accuracy = model.evaluate(x_test,
y_test)
return accuracy
The above-mentioned steps are good if you want log custom metrics or a variety of metrics other than accuracy or loss which you have defined in the compile method.
But if you don't want to use custom metrics or don't want to deal with summary writers etc. You can use Keras callbacks to simplify the process.
Complete code with callbacks without summary writers
# Creating Hparams
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([16, 32]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))
# Creating train test function
def train_test_model(hparams, run_dir):
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu),
tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(
optimizer=hparams[HP_OPTIMIZER],
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
callbacks = [
tf.keras.callbacks.TensorBoard(run_dir),# log metrics
hp.KerasCallback(run_dir, hparams), # log hparams
]
model.fit(x_train, y_train, epochs=10, callbacks = callbacks) # Run with 1 epoch to speed things up for demo purposes
_, accuracy = model.evaluate(x_test,
y_test)
return accuracy
# Running different configurations
session_num = 0
for num_units in HP_NUM_UNITS.domain.values:
for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
for optimizer in HP_OPTIMIZER.domain.values:
hparams = {
HP_NUM_UNITS: num_units,
HP_DROPOUT: dropout_rate,
HP_OPTIMIZER: optimizer,
}
run_name = "run-%d" % session_num
print('--- Starting trial: %s' % run_name)
print({h.name: hparams[h] for h in hparams})
train_test_model(hparams, 'logs/hparam_tuning/' + run_name)
session_num += 1
Useful Links:
Hyperparameter Tuning with the HParams Dashboard
Hparams demo using all possible Hparam objects - Official Github Repo
Related
I'm trying to train a basic Graph Neural Network using the StellarGraph library, in particular starting from the example provided in [0].
The example works fine, but now I would like to repeat the same exercize removing the N-Fold Crossvalidation and providing specific training, validation and test sets. I'm trying to do so with the following code:
# One hot encoding
graph_training_set_labels_encoded = pd.get_dummies(graphs_training_set_labels, drop_first=True)
graph_validation_set_labels_encoded = pd.get_dummies(graphs_validation_set_labels, drop_first=True)
graphs = graphs_training_set + graphs_validation_set
# Graph generator preparation
generator = PaddedGraphGenerator(graphs=graphs)
train_gen = generator.flow([x for x in range(0, len(graphs_training_set))],
targets=graph_training_set_labels_encoded,
batch_size=batch_size)
valid_gen = generator.flow([x for x in range(len(graphs_training_set),
len(graphs_training_set) + len(graphs_validation_set))],
targets=graph_validation_set_labels_encoded,
batch_size=batch_size)
# Stopping criterium
es = EarlyStopping(monitor="val_loss",
min_delta=0,
patience=20,
restore_best_weights=True)
# Model definition
gc_model = GCNSupervisedGraphClassification(layer_sizes=[64, 64],
activations=["relu", "relu"],
generator=generator,
dropout=dropout_value)
x_inp, x_out = gc_model.in_out_tensors()
predictions = Dense(units=32, activation="relu")(x_out)
predictions = Dense(units=16, activation="relu")(predictions)
predictions = Dense(units=1, activation="sigmoid")(predictions)
# Creating Keras model and preparing it for training
model = Model(inputs=x_inp, outputs=predictions)
model.compile(optimizer=Adam(adam_value), loss=binary_crossentropy, metrics=["acc"])
# GNN Training
history = model.fit(train_gen, epochs=num_epochs, validation_data=valid_gen, verbose=0, callbacks=[es])
# Calculate performance on the validation data
test_metrics = model.evaluate(valid_gen, verbose=0)
valid_acc = test_metrics[model.metrics_names.index("acc")]
print(f"Test Accuracy model = {valid_acc}")
Where graphs_training_set and graphs_validation_set are lists of StellarDiGraphs.
I am able to run this piece of code, but it provides NaN as result. What could be the problem?
Since it is the first time I am using StellarGraph and in particular PaddedGraphGenerator. I think my mistake rely on the usage of that generator, but providing training set and validation set in different manner didn't produce better results.
Thank you in advance.
UPDATE Fixed I typo in the code, as pointed out here (thanks to george123).
[0] https://stellargraph.readthedocs.io/en/stable/demos/graph-classification/gcn-supervised-graph-classification.html
I found a solution digging in the StellarGraph documentation for PaddedGraphGenerator and GCN Neural Network Class GCNSupervisedGraphClassification. Furthermore, I have found a similar question on StellarGraph Issue Tracker which also points out to the solution.
# Graph generator preparation
generator = PaddedGraphGenerator(graphs=graphs)
train_gen = generator.flow([x for x in range(0, num_graphs_for_training)],
targets=training_graphs_labels,
batch_size=35)
valid_gen = generator.flow([x for x in range(num_graphs_for_training, num_graphs_for_training + num_graphs_for_validation)],
targets=validation_graphs_labels,
batch_size=35)
# Stopping criterium
es = EarlyStopping(monitor="val_loss",
min_delta=0.001,
patience=10,
restore_best_weights=True)
# Model definition
gc_model = GCNSupervisedGraphClassification(layer_sizes=[64, 64],
activations=["relu", "relu"],
generator=generator,
dropout=dropout_value)
x_inp, x_out = gc_model.in_out_tensors()
predictions = Dense(units=32, activation="relu")(x_out)
predictions = Dense(units=16, activation="relu")(predictions)
predictions = Dense(units=1, activation="sigmoid")(predictions)
# Let's create the Keras model and prepare it for training
model = Model(inputs=x_inp, outputs=predictions)
model.compile(optimizer=Adam(adam_value), loss=binary_crossentropy, metrics=["acc"])
# GNN Training
history = model.fit(train_gen, epochs=num_epochs, validation_data=valid_gen, verbose=1, callbacks=[es])
# Evaluate performance on the validation data
valid_metrics = model.evaluate(valid_gen, verbose=0)
valid_acc = valid_metrics[model.metrics_names.index("acc")]
# Define test set indices temporary vars
index_begin_test_set = num_graphs_for_training + num_graphs_for_validation
index_end_test_set = index_begin_test_set + num_graphs_for_testing
test_set_indices = [x for x in range(index_begin_test_set, index_end_test_set)]
# Evaluate performance on test set
generator_for_test_set = PaddedGraphGenerator(graphs=graphs)
test_gen = generator_for_test_set.flow(test_set_indices)
result = model.predict(test_gen)
I was trying to use the attention model described here in a simple bidirectional lstm model. However, after adding the attention model, I got this error:
ValueError: Unknown initializer: GlorotUniform
To begin with, my code didn't have any incompatibility issue in terms of using TensorFlow in some part and Keras in other parts of the code. I also tried every solution addressed in this post. However, none of them worked for me. I must mention that my code worked with no issues before adding the attention model. So, I tried removing every line of the attention part of the network structure to see what line is causing this problem:
inputs = tf.keras.layers.Input(shape=(n_timesteps, n_features))
units = 50
activations = tf.keras.layers.Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(units,
return_sequences=True),
merge_mode='concat')(inputs)
print(np.shape(activations))
# Implementation of attention
x1 = tf.keras.layers.Dense(1, activation='tanh')(activations)
print(np.shape(x1))
x1= tf.keras.layers.Flatten()(x1)
print(np.shape(x1))
x1= tf.keras.layers.Activation('softmax')(x1)
print(np.shape(x1))
x1=tf.keras.layers.RepeatVector(units*2)(x1)
print(np.shape(x1))
x1 = tf.keras.layers.Permute([2,1])(x1)
print(np.shape(x1))
sent_representation = tf.keras.layers.Multiply()([activations, x1])
print(np.shape(sent_representation))
sent_representation = tf.keras.layers.Lambda(lambda xin:tf.keras.backend.sum(xin, axis=-2),
output_shape=(units*2,))(sent_representation)
# softmax for classification
x = tf.keras.layers.Dense(n_outputs, activation='softmax')(sent_representation)
model = tf.keras.models.Model(inputs=inputs, outputs=x)
I realized it is the line with lambda function and tf.keras.backend.sum that is causing the error. So, after some search I decided to replace that line with the following:
sent_representation = tf.math.reduce_sum(sent_representation, axis=-2)
Now, my code works. However, I am not quite sure if this substitution is correct. Am I doing this right?
Edit: Here is the next lines of the code, the problem is caused when I try to load the best model for testing:
optimizer = tf.keras.optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9)
model.compile(loss=lossFunction, optimizer=optimizer, metrics=['accuracy'])
print(model.summary())
# early stopping
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min',
verbose=1, patience=20)
mc = tf.keras.callbacks.ModelCheckpoint('best_model.h5',
monitor='val_accuracy', mode='max', verbose=1,
save_best_only=True)
history = model.fit(trainX, trainy, validation_data=(valX, valy),
shuffle = True, epochs=epochs, verbose=0,
callbacks=[es, mc])
saved_model = tf.keras.models.load_model('best_model.h5',
custom_objects={"GlorotUniform": tf.keras.initializers.glorot_uniform()})
# evaluate the model
_, train_acc = saved_model.evaluate(trainX, trainy, verbose=0) # saved_model
_, val_acc = saved_model.evaluate(valX, valy, verbose=0) # saved_model
_, accuracy = saved_model.evaluate(testX, testy, verbose=0) # saved_model
print('Train: %.3f, Validation: %.3f, Test: %.3f' % (train_acc, val_acc, accuracy))
y_pred = saved_model.predict(testX, batch_size=64, verbose=1)
Do you see any problem in my code that might be the cause of the error that I get when I use Lambda layer?
The code you provided works for me without problem with tf.keras.backend.sum and with tf.math.reduce_sum
The answer is that your substitution doesn't alter your network or what you are you looking for. You can test it on your own and verify that tf.keras.backend.sum is equal to tf.math.reduce_sum
X = np.random.uniform(0,1, (32,100,10)).astype('float32')
(tf.keras.backend.sum(X, axis=-2) == tf.reduce_sum(X, axis=-2)).numpy().all() # TRUE
I also suggest you to wrap the operation with a Lambda layer
EDIT: the usage of tf.reduce_sum or tf.keras.backend.sum, wrapped in a Lambda layer, don't raise error if using a TF version >= 2.2.
In the model building, you need to use layers only. If you want to use some tensorflow ops (like tf.reduce_sum or tf.keras.backend.sum) you need to wrap them in keras Lambda layer. Without this the model can still work but using Lambda is a good practice in order to avoid future problem
I've read related threads but not been able to solve my problem.
I'm currently trying to get my model to run in order to classify 5000 different events, which all currently fall under the same category (so my "labels" dataset consists of 5000 1s).
I'm using one hot encoding for my labels data set:
labels = np.loadtxt("/content/drive/My Drive/5000labels1.csv")
from keras.utils import to_categorical
labels=to_categorical(labels) # convert labels to one-hot encoding
I then define my model like so:
inputs = keras.Input(shape=(29,29,1))
x=inputs
x = keras.layers.Conv2D(16, kernel_size=(3,3), name='Conv_1')(x)
x = keras.layers.LeakyReLU(0.1)(x)
x = keras.layers.MaxPool2D((2,2), name='MaxPool_1')(x)
x = keras.layers.Conv2D(16, kernel_size=(3,3), name='Conv_2')(x)
x = keras.layers.LeakyReLU(0.1)(x)
x = keras.layers.MaxPool2D((2,2), name='MaxPool_2')(x)
x = keras.layers.Conv2D(32, kernel_size=(3,3), name='Conv_3')(x)
x = keras.layers.LeakyReLU(0.1)(x)
x = keras.layers.MaxPool2D((2,2), name='MaxPool_3')(x)
x = keras.layers.Flatten(name='Flatten')(x)
x = keras.layers.Dense(64, name='Dense_1')(x)
x = keras.layers.ReLU(name='ReLU_dense_1')(x)
x = keras.layers.Dense(64, name='Dense_2')(x)
x = keras.layers.ReLU(name='ReLU_dense_2')(x)
outputs = keras.layers.Dense(4, activation='softmax', name='Output')(x)
model = keras.Model(inputs=inputs, outputs=outputs, name='VGGlike_CNN')
model.summary()
keras.utils.plot_model(model, show_shapes=True)
OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR_ST)
model.compile(optimizer=OPTIMIZER,
loss='categorical_crossentropy',
metrics=['accuracy'],
run_eagerly=False)
def lr_decay(epoch):
if epoch < 10:
return LR_ST
else:
return LR_ST * tf.math.exp(0.2 * (10 - epoch))
lr_scheduler = keras.callbacks.LearningRateScheduler(lr_decay)
model_checkpoint = keras.callbacks.ModelCheckpoint(
filepath='mycnn_best',
monitor='val_accuracy',
save_weights_only=True,
save_best_only=True,
save_freq='epoch')
callbacks = [ lr_scheduler, model_checkpoint ]
print('X_train.shape = ',X_train.shape)
history = model.fit(X_train, epochs=50,
validation_data=X_test, shuffle=True, verbose=1,
callbacks=callbacks)
I get the error: "No gradients provided for any variable: ['Conv_1_2/kernel:0', 'Conv_1_2/bias:0', 'Conv_2_2/kernel:0', 'Conv_2_2/bias:0', 'Conv_3_2/kernel:0', 'Conv_3_2/bias:0', 'Dense_1_2/kernel:0', 'Dense_1_2/bias:0', 'Dense_2_2/kernel:0', 'Dense_2_2/bias:0', 'Output_2/kernel:0', 'Output_2/bias:0']. "
From what I've read, it seems most likely due to a problem with the loss function - but I don't understand what the problem can be. Eventually I want the network to classify events into one of 4 categories, so I used the categorical cross-entropy in order to get a probability associated with each value of number of events.
Can anyone help me? If needed I can provide a link to the google colab file of my original code.
Thanks in advance!
you miss your target
model.fit(X_train, y_train, ..., validation_data = (X_test, y_test))
I would like to keep track of the gradients over tensorboard.
However, since session run statements are not a thing anymore and the write_grads argument of tf.keras.callbacks.TensorBoard is depricated, I would like to know how to keep track of gradients during training with Keras or tensorflow 2.0.
My current approach is to create a new callback class for this purpose, but without success. Maybe someone else knows how to accomplish this kind of advanced stuff.
The code created for testing is shown below, but runs into errors independently of printing a gradient value to console or tensorboard.
import tensorflow as tf
from tensorflow.python.keras import backend as K
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu', name='dense128'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax', name='dense10')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
class GradientCallback(tf.keras.callbacks.Callback):
console = True
def on_epoch_end(self, epoch, logs=None):
weights = [w for w in self.model.trainable_weights if 'dense' in w.name and 'bias' in w.name]
loss = self.model.total_loss
optimizer = self.model.optimizer
gradients = optimizer.get_gradients(loss, weights)
for t in gradients:
if self.console:
print('Tensor: {}'.format(t.name))
print('{}\n'.format(K.get_value(t)[:10]))
else:
tf.summary.histogram(t.name, data=t)
file_writer = tf.summary.create_file_writer("./metrics")
file_writer.set_as_default()
# write_grads has been removed
tensorboard_cb = tf.keras.callbacks.TensorBoard(histogram_freq=1, write_grads=True)
gradient_cb = GradientCallback()
model.fit(x_train, y_train, epochs=5, callbacks=[gradient_cb, tensorboard_cb])
Priniting bias gradients to console (console parameter = True)
leads to: AttributeError: 'Tensor' object has no attribute 'numpy'
Writing to tensorboard (console parameter = False) creates:
TypeError: Using a tf.Tensor as a Python bool is not allowed. Use if t is not None: instead of if t: to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the
value of a tensor.
To compute the gradients of the loss against the weights, use
with tf.GradientTape() as tape:
loss = model(model.trainable_weights)
tape.gradient(loss, model.trainable_weights)
This is (arguably poorly) documented on GradientTape.
We do not need to tape.watch the variable because trainable parameters are watched by default.
As a function, it can be written as
def gradient(model, x):
x_tensor = tf.convert_to_tensor(x, dtype=tf.float32)
with tf.GradientTape() as t:
t.watch(x_tensor)
loss = model(x_tensor)
return t.gradient(loss, x_tensor).numpy()
Also have a look here: https://github.com/tensorflow/tensorflow/issues/31542#issuecomment-630495970
richardwth wrote a child class of Tensorboard.
I adapted it as follows:
class ExtendedTensorBoard(tf.keras.callbacks.TensorBoard):
def _log_gradients(self, epoch):
writer = self._writers['train']
with writer.as_default(), tf.GradientTape() as g:
# here we use test data to calculate the gradients
features, y_true = list(val_dataset.batch(100).take(1))[0]
y_pred = self.model(features) # forward-propagation
loss = self.model.compiled_loss(y_true=y_true, y_pred=y_pred) # calculate loss
gradients = g.gradient(loss, self.model.trainable_weights) # back-propagation
# In eager mode, grads does not have name, so we get names from model.trainable_weights
for weights, grads in zip(self.model.trainable_weights, gradients):
tf.summary.histogram(
weights.name.replace(':', '_') + '_grads', data=grads, step=epoch)
writer.flush()
def on_epoch_end(self, epoch, logs=None):
# This function overwrites the on_epoch_end in tf.keras.callbacks.TensorBoard
# but we do need to run the original on_epoch_end, so here we use the super function.
super(ExtendedTensorBoard, self).on_epoch_end(epoch, logs=logs)
if self.histogram_freq and epoch % self.histogram_freq == 0:
self._log_gradients(epoch)
My model weights (I output them to weights_before.txt and weights_after.txt) are precisely the same before and after the training, i.e. the training doesn't change anything, there's no fitting happening.
My data look like this (I basically want the model to predict the sign of feature, result is 0 if feature is negative, 1 if positive):
,feature,zerosColumn,result
0,-5,0,0
1,5,0,1
2,-3,0,0
3,5,0,1
4,3,0,1
5,3,0,1
6,-3,0,0
...
Brief summary of my approach:
Load the data.
Split it column-wise to x (feature) and y (result), split these two row-wise to test and validation sets.
Transform these sets into TimeseriesGenerators (not necessary in this scenario but I want to get this setup working and I don't see any reason why it shouldn't).
Create and compile simple Sequential model with few Dense layers and softmax activation on its output layer, use binary_crossentropy as loss function.
Train the model... nothing happens!
Complete code follows:
import keras
import pandas as pd
import numpy as np
np.random.seed(570)
TIMESERIES_LENGTH = 1
TIMESERIES_SAMPLING_RATE = 1
TIMESERIES_BATCH_SIZE = 1024
TEST_SET_RATIO = 0.2 # the portion of total data to be used as test set
VALIDATION_SET_RATIO = 0.2 # the portion of total data to be used as validation set
RESULT_COLUMN_NAME = 'feature'
FEATURE_COLUMN_NAME = 'result'
def create_network(csv_path, save_model):
before_file = open("weights_before.txt", "w")
after_file = open("weights_after.txt", "w")
data = pd.read_csv(csv_path)
data[RESULT_COLUMN_NAME] = data[RESULT_COLUMN_NAME].shift(1)
data = data.dropna()
x = data.ix[:, 1:2]
y = data.ix[:, 3]
test_set_length = int(round(len(x) * TEST_SET_RATIO))
validation_set_length = int(round(len(x) * VALIDATION_SET_RATIO))
x_train_and_val = x[:-test_set_length]
y_train_and_val = y[:-test_set_length]
x_train = x_train_and_val[:-validation_set_length].values
y_train = y_train_and_val[:-validation_set_length].values
x_val = x_train_and_val[-validation_set_length:].values
y_val = y_train_and_val[-validation_set_length:].values
train_gen = keras.preprocessing.sequence.TimeseriesGenerator(
x_train,
y_train,
length=TIMESERIES_LENGTH,
sampling_rate=TIMESERIES_SAMPLING_RATE,
batch_size=TIMESERIES_BATCH_SIZE
)
val_gen = keras.preprocessing.sequence.TimeseriesGenerator(
x_val,
y_val,
length=TIMESERIES_LENGTH,
sampling_rate=TIMESERIES_SAMPLING_RATE,
batch_size=TIMESERIES_BATCH_SIZE
)
model = keras.models.Sequential()
model.add(keras.layers.Dense(10, activation='relu', input_shape=(TIMESERIES_LENGTH, 1)))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(10, activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(1, activation='softmax'))
for item in model.get_weights():
before_file.write("%s\n" % item)
model.compile(
loss=keras.losses.binary_crossentropy,
optimizer="adam",
metrics=[keras.metrics.binary_accuracy]
)
history = model.fit_generator(
train_gen,
epochs=10,
verbose=1,
validation_data=val_gen
)
for item in model.get_weights():
after_file.write("%s\n" % item)
before_file.close()
after_file.close()
create_network("data/sign_data.csv", False)
Do you have any ideas?
The problem is that you are using softmax as the activation function of last layer. Essentially, softmax normalizes its input to make the sum of the elements to be one. Therefore, if you use it on a layer with only one unit (i.e. Dense(1,...)), then it would always output 1. To fix this, change the activation function of last layer to sigmoid which outputs a value in the range (0,1).