What causes tensorflow keras Conv1D to only run the 1st epoch?

currently I am using tensorflow to create a neural network with a 1D convolutional layer and Dense layer to predict a single output value. The input array for the neural network is an array of 1500 samples; each sample is an array of 27x13 values.
I started training in the same manner as I did without the 1D conv layer, but the training stopped during the first epoch without warning.
I found that multiprocessing might be the cause and for that, I should turn multiprocessing off as discussed here: https://github.com/stellargraph/stellargraph/issues/1006
basically adding this to my keras model:
That did not change anything, after which I found that I should probably use a DataSet to bypass multiprocessing issues according to
Replace tf.keras.Sequence objects with tf.data.Dataset #1206
after struggling with the difference between
I found the following code to start executing the model.fit block again. As you might have guessed, it still stops running after the first epoch:
main loop started
Epoch 1/5
Can someone pinpoint the source of the halting of the program?
This is my code:
import random
import numpy as np
from keras import backend as K
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import load_model
from keras.callbacks import CSVLogger
def tfdata_generator(x, y, is_training, batch_size=BATCH_SIZE):
'''Construct a data generator using `tf.Dataset`. '''
dataset = tf.data.Dataset.from_tensor_slices((x, y))
if is_training:
dataset = dataset.shuffle(1500) # depends on sample size
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.repeat()
dataset = dataset.prefetch(1)
return dataset
def main():
print("main loop started")
X_train = np.random.randn(1500, 27, 13)
Y_train = np.random.randn(1500, 1)
training_set = tfdata_generator(X_train, Y_train, is_training=True)
data = np.random.randn(1500, 27, 13), Y_train
training_set = tf.data.Dataset.from_tensors((X_train, Y_train))
logstring = "C:\Documents\Conv1D"
csv_logger = CSVLogger((logstring + ".csv"), append=True, separator=';')
early_stopper = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, min_delta=0.00001)
model = keras.Sequential()
model.add(layers.Dense(70, activation='relu', name="layer2"))
optimizer =keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss="mean_squared_error")
# WARNING:tensorflow:multiprocessing can interact badly with TensorFlow, causing nondeterministic deadlocks. For high performance data pipelines tf.data is recommended.
epochs = EPOCHS,
verbose = 2,
modelstring = "C:\Documents\Conv1D_finishedmodel"
model.save(modelstring, overwrite=True)
model = load_model(modelstring)


Convolutional LSTM Model Dimension Incompatibility when making predictions & prediction dimension issues

I structured a Convolutional LSTM model to predict the forthcoming Bitcoin price data, using the analyzed past data of the Bitcoin close price and other features.
Let me jump straight to the code:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import tensorflow.keras as keras
import keras_tuner as kt
from keras_tuner import HyperParameters as hp
from keras.models import Sequential
from keras.layers import InputLayer, ConvLSTM1D, LSTM, Flatten, RepeatVector, Dense, TimeDistributed
from keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
import keras.backend as K
from keras.losses import Huber
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
DIR = '../input/btc-features-targets'
SEG_DIR = '../input/segmented'
segmentized_features = os.listdir(SEG_DIR)
btc_train_features = []
for seg in segmentized_features:
train_features = pd.read_csv(f'{SEG_DIR}/{seg}')
train_features.set_index('date', inplace=True)
btc_train_targets = pd.read_csv(f'{DIR}/btc_train_targets.csv')
btc_train_targets.set_index('date', inplace=True)
btc_test_features = pd.read_csv(f'{DIR}/btc_test_features.csv')
btc_tef1 = btc_test_features.iloc[:111]
btc_tef2 = btc_test_features.iloc[25:]
btc_tef1.set_index('date', inplace=True)
btc_tef2.set_index('date', inplace=True)
btc_test_targets = pd.read_csv(f'{DIR}/btc_test_targets.csv')
btc_test_targets.set_index('date', inplace=True)
btc_trt_log = np.log(btc_train_targets)
btc_tefs1 = scaler.fit_transform(btc_tef1.values)
btc_tefs2 = scaler.fit_transform(btc_tef2.values)
btc_tet_log = np.log(btc_test_targets)
scaled_train_features = []
for features in btc_train_features:
shape = features.shape
scaled_train_features.append(np.expand_dims(features, [0,3]))
shape_2 = btc_tefs1.shape
btc_tefs1 = np.expand_dims(btc_tefs1, [0,3])
shape_3 = btc_tefs2.shape
btc_tefs2 = np.expand_dims(btc_tefs2, [0,3])
btc_trt_log = btc_trt_log.values[0]
btc_tet_log = btc_tet_log.values[0]
def build(hp):
model = keras.Sequential()
# Input Layer
# ConvLSTM1D
convLSTM_hp_filters = hp.Int(name='convLSTM_filters', min_value=32, max_value=512, step=32)
convLSTM_hp_kernel_size = hp.Choice(name='convLSTM_kernel_size', values=[3,5,7])
convLSTM_activation = hp.Choice(name='convLSTM_activation', values=['selu', 'relu'])
# Flatten
# RepeatVector
LSTM_hp_units = hp.Int(name='LSTM_units', min_value=32, max_value=512, step=32)
LSTM_activation = hp.Choice(name='LSTM_activation', values=['selu', 'relu'])
model.add(LSTM(units=LSTM_hp_units, activation=LSTM_activation, return_sequences=True))
# TimeDistributed Dense
dense_units = hp.Int(name='dense_units', min_value=32, max_value=512, step=32)
dense_activation = hp.Choice(name='dense_activation', values=['selu', 'relu'])
model.add(TimeDistributed(Dense(units=dense_units, activation=dense_activation)))
# TimeDistributed Dense_Output
# Set Learning Rate
hp_learning_rate = hp.Choice(name='learning_rate', values=[1e-2, 1e-3, 1e-4])
# Compile Model
return model
tuner = kt.Hyperband(build,
objective=kt.Objective('root_mean_squared_error', direction='min'),
early_stop = EarlyStopping(monitor='root_mean_squared_error', patience=5)
opt_hps = []
for train_features in scaled_train_features:
tuner.search(train_features, btc_trt_log, epochs=50, callbacks=[early_stop])
models, epochs = ([] for _ in range(2))
for hps in opt_hps:
model = tuner.hypermodel.build(hps)
history = model.fit(train_features, btc_trt_log, epochs=70, verbose=0)
rmse = history.history['root_mean_squared_error']
best_epoch = rmse.index(min(rmse)) + 1
hypermodel = tuner.hypermodel.build(opt_hps[0])
for train_features, epoch in zip(scaled_train_features, epochs): hypermodel.fit(train_features, btc_trt_log, epochs=epoch)
tp1 = hypermodel.predict(btc_tefs1).flatten()
tp2 = hypermodel.predict(btc_tefs2).flatten()
test_predictions = np.concatenate((tp1, tp2[86:]), axis=None)
The hyperparameters of the model are configured using keras_tuner; as there were ResourceExhaustError issues output by the notebook when training is done with the full features dataset, sequentially segmented datasets are used instead (and apparently, referring to the study done utilizing the similar model architecture, training is able to be efficiently done through this training approach).
The input dimension of each segmented dataset is (111,32,1).
There aren't any issues reported until before the last code block. The models work fine. Yet, when the .predict() function is executed, the notebook prints out an error, which states that the dimension of the input features for making predictions is incompatible with the dimension of the input features used while training. I did not understand the reason behind its occurrence, since as far as I know, the input dimensions of a train dataset for a DNN model cannot be identical as the input dimensions of a test dataset.
Even though all the price data from 2018 to early 2021 are used as training datasets, predictions are only needed for the mid 2021 timeframe.
The dataset used for prediction has a dimension of (136,32,1).
I tried matching the dimension of this dataset to (111,32,1), through index slicing.
Now this showed issues in the output dimension. While predictions should be made for 136 data points, the result only returned 10.
Are there any issues relevant to the model configuration? Cannot interpret the current situation.

Loss & accuracy don't improve in Xception (image classification)

As a trial, I'm implementing Xception to classify images without using pretrained weight in Tensorflow.
However, the accuracy are too low compared to the original paper.
Could somebody share any advice to address this problem?
I prepared 500 out of 1000 classes from ImageNet and train ready-Xception model with this data from scrach .
I tried the same learning rate and optimizer as used in the original paper.
– Optimizer: SGD
– Momentum: 0.9
– Initial learning rate: 0.045
– Learning rate decay: decay of rate 0.94 every 2 epochs
However, this did not work so well.
I know it is better to use all of 1000 classes rather than only 500, however, I couldn't prepare storage for it.
Did it affect the performance of my code?
Here is my code.
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers, losses, models, optimizers, callbacks, applications, preprocessing
# scheduler
def scheduler(epoch, lr):
return 0.045*0.94**(epoch/2.0)
lr_decay = callbacks.LearningRateScheduler(scheduler)
# early stopping
EarlyStopping = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=500, verbose=0, mode='auto', restore_best_weights=True)
# build xception
inputs = tf.keras.Input(shape=(224, 224, 3))
x = tf.cast(inputs, tf.float32)
x = tf.keras.applications.xception.preprocess_input(x) #preprocess image
x = applications.xception.Xception(weights=None, include_top=False,)(x, training=True)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(nb_class)(x)
outputs = layers.Softmax()(x)
model = tf.keras.Model(inputs, outputs)
model.compile(optimizer=optimizers.SGD(momentum=0.9, nesterov=True),
loss = 'categorical_crossentropy',
metrics= ['accuracy'])
# fitting data
history = model.fit(image_gen(df_train_chunk, 224, 224, ), #feed images with a generator
batch_size = 32,
steps_per_epoch = 64,
validation_data = image_gen(df_valid_chunk, 224, 224, ), #feed images with a generator
validation_steps = 64,
callbacks = [lr_decay, EarlyStopping],
My results are below. In the original paper, its accuracy reached around 0.8.
In contrast, the performance of my code is too poor.
Some might wonder if my generator got wrong, so I put my generator code and result below.
from PIL import Image, ImageEnhance, ImageOps
def image_gen(df_data, h, w, shuffle=True):
nb_class = len(np.unique(df_data['Class']))
while True:
if shuffle:
df_data = df_data.sample(frac=1)
for i in range(len(df_data)):
X = Image.open((df_data.iloc[i]).loc['Path'])
X = X.convert('RGB')
X = X.resize((w,h))
X = preprocessing.image.img_to_array(X)
X = np.expand_dims(X, axis=0)
klass = (df_data.iloc[i]).loc['Class']
y = np.zeros(nb_class)
y[klass] = 1
y = np.expand_dims(y, axis=0)
yield X, y
train_gen = image_gen(df_train_chunk, 224, 224, )
for i in range(5):
X, y = next(train_gen)
print('\n\n class: ', y.argmax(-1))
the result is below.
When you chose only 500 labels, do you choose the first 500?
softmax output starting from 0, so make sure your labels staring from 0 to 499 either.

How to implement a CNN-LSTM using Keras

I am attempting to implement a CNN-LSTM that classifies mel-spectrogram images representing the speech of people with Parkinson's Disease/Healthy Controls. I am trying to implement a pre-existing model (DenseNet-169) with an LSTM model, however I am running into the following error: ValueError: Input 0 of layer zero_padding2d is incompatible with the layer: expected ndim=4, found ndim=3. Full shape received: [None, 216, 1]. Can anyone advise where I'm going wrong?
import librosa
import os
import glob
import IPython.display as ipd
from pathlib import Path
import timeit
import time, sys
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import numpy as np
import cv2
import seaborn as sns
%tensorflow_version 1.x #version 1 works without problems
import tensorflow
from tensorflow.keras import models
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import TimeDistributed
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense, BatchNormalization, Activation, GaussianNoise, LSTM
from sklearn.metrics import accuracy_score
DATA_DIR = Path('/content/drive/MyDrive/PhD_Project_Experiments/Spontaneous_Dialogue_PD_Dataset')
diagnosis = [x.name for x in DATA_DIR.glob('*') if x.is_dir()]
def create_paths_ds(paths: Path, label: str) -> list:
return [(x, label) for x in paths.glob('*' + EXTENSION_TYPE)]
from collections import Counter
categories_to_use = [
NUM_CLASSES = len(categories_to_use)
print(f'Number of classes: {NUM_CLASSES}')
paths_all_labels = []
for cat in categories_to_use:
paths_all_labels += create_paths_ds(DATA_DIR / cat, cat)
X_train, X_test = train_test_split(paths_all_labels,test_size=0.1, stratify = [paths_all_labels[y][1] for y in range(len(paths_all_labels))] ) #fix stratified sampling for test data
X_train, X_val = train_test_split(X_train, test_size=0.2, stratify = [X_train[y][1] for y in range(len(X_train))] )
for i in categories_to_use:
print('Number of train samples for '+i+': '+ str([X_train[y][1] for y in range(len(X_train))].count(i))) #checks whether train samples are equally divided
print('Number of test samples for '+i+': '+ str([X_test[y][1] for y in range(len(X_test))].count(i))) #checks whether test samples are equally divided
print('Number of validation samples for '+i+': '+ str([X_val[y][1] for y in range(len(X_val))].count(i))) #checks whether val samples are equally divided
print(f'Train length: {len(X_train)}')
print(f'Validation length: {len(X_val)}')
print(f'Test length: {len(X_test)}')
def load_and_preprocess_lstm(dataset, SAMPLE_SIZE = 30):
IMG_SIZE = (216,128)
data = []
labels = []
for (path, label) in dataset:
audio, sr = librosa.load(path)
dur = librosa.get_duration(audio, sr = sr)
sampleNum = int(dur / SAMPLE_SIZE)
offset = (dur % SAMPLE_SIZE) / 2
for i in range(sampleNum):
audio, sr = librosa.load(path, offset= offset+i, duration=SAMPLE_SIZE)
sample = librosa.feature.melspectrogram(audio, sr=sr)
# print(sample.shape)
sample = cv2.resize(sample, dsize=IMG_SIZE)
sample = np.expand_dims(sample,-1)
data += [(sample, label)]
labels += [label]
progress +=1
print('\r Progress: '+str(round(100*progress/len(dataset))) + '%', end='')
return data, labels
def retrieve_samples(sample_size, model_type):
if model_type == 'cnn':
print("\nLoading train samples")
X_train_samples, train_labels = load_and_preprocess_cnn(X_train,sample_size)
print("\nLoading test samples")
X_test_samples, test_labels = load_and_preprocess_cnn(X_test,sample_size)
print("\nLoading val samples")
X_val_samples, val_labels = load_and_preprocess_cnn(X_val,sample_size)
elif model_type == 'lstm':
print("\nLoading train samples")
X_train_samples, train_labels = load_and_preprocess_lstm(X_train,sample_size)
print("\nLoading test samples")
X_test_samples, test_labels = load_and_preprocess_lstm(X_test,sample_size)
print("\nLoading val samples")
X_val_samples, val_labels = load_and_preprocess_lstm(X_val,sample_size)
elif model_type == "cnnlstm":
print("\nLoading train samples")
X_train_samples, train_labels = load_and_preprocess_lstm(X_train,sample_size)
print("\nLoading test samples")
X_test_samples, test_labels = load_and_preprocess_lstm(X_test,sample_size)
print("\nLoading val samples")
X_val_samples, val_labels = load_and_preprocess_lstm(X_val,sample_size)
print("shape: " + str(X_train_samples[0][0].shape))
print("number of training samples: "+ str(len(X_train_samples)))
print("number of validation samples: "+ str(len(X_val_samples)))
print("number of test samples: "+ str(len(X_test_samples)))
return X_train_samples, X_test_samples, X_val_samples
def create_cnn_lstm_model(input_shape):
model = Sequential()
cnn = tensorflow.keras.applications.DenseNet169(include_top=True, weights=None, input_tensor=None, input_shape=input_shape, pooling=None, classes=2)
# define LSTM model
model.add(tensorflow.keras.layers.TimeDistributed(cnn, input_shape=input_shape))
model.add(LSTM(units = 512, dropout=0.5, recurrent_dropout=0.3, return_sequences = True, input_shape = input_shape))
model.add(LSTM(units = 512, dropout=0.5, recurrent_dropout=0.3, return_sequences = False))
model.add(Dense(units=NUM_CLASSES, activation='sigmoid'))#Compile
model.compile(loss=tensorflow.keras.losses.binary_crossentropy, optimizer='adam', metrics=['accuracy'])
return model
def create_model_data_and_labels(X_train_samples, X_val_samples, X_test_samples):
#Prepare samples to work for training the model
labelizer = LabelEncoder()
#prepare training data and labels
x_train = np.array([x[0] for x in X_train_samples])
y_train = np.array([x[1] for x in X_train_samples])
y_train = labelizer.fit_transform(y_train)
y_train = to_categorical(y_train)
#prepare validation data and labels
x_val = np.array([x[0] for x in X_val_samples])
y_val = np.array([x[1] for x in X_val_samples])
y_val = labelizer.transform(y_val)
y_val = to_categorical(y_val)
#prepare test data and labels
x_test = np.array([x[0] for x in X_test_samples])
y_test = np.array([x[1] for x in X_test_samples])
y_test = labelizer.transform(y_test)
y_test = to_categorical(y_test)
return x_train, y_train, x_val, y_val, x_test, y_test, labelizer
#Main loop for testing multiple sample sizes
#choose model type: 'cnn' or 'lstm'
model_type = 'cnnlstm'
n_epochs = 20
patience= 20
es = EarlyStopping(patience=20)
fragment_sizes = [5,10]
start = timeit.default_timer()
ModelData = pd.DataFrame(columns = ['Model Type','Fragment size (s)', 'Time to Compute (s)', 'Early Stopping epoch', 'Training accuracy', 'Validation accuracy', 'Test Accuracy']) #create a DataFrame for storing the results
conf_matrix_data = []
for i in fragment_sizes:
start_per_size = timeit.default_timer()
print(f'\n---------- Model trained on fragments of size: {i} seconds ----------------')
X_train_samples, X_test_samples, X_val_samples = retrieve_samples(i,model_type)
x_train, y_train, x_val, y_val, x_test, y_test, labelizer = create_model_data_and_labels(X_train_samples, X_val_samples, X_test_samples)
if model_type == 'cnn':
model = create_cnn_model(X_train_samples[0][0].shape)
elif model_type == 'lstm':
model = create_lstm_model(X_train_samples[0][0].shape)
elif model_type == 'cnnlstm':
model = create_cnn_lstm_model(X_train_samples[0][0].shape)
history = model.fit(x_train, y_train,
batch_size = 8,
validation_data=(x_val, y_val))
print('Finished training')
early_stopping_epoch = len(history.history['accuracy'])
training_accuracy = history.history['accuracy'][early_stopping_epoch-1-patience]
validation_accuracy = history.history['val_accuracy'][early_stopping_epoch-1-patience]
plot_data(history, i)
predictions = model.predict(x_test)
score = accuracy_score(labelizer.inverse_transform(y_test.argmax(axis=1)), labelizer.inverse_transform(predictions.argmax(axis=1)))
print('Fragment size = ' + str(i) + ' seconds')
print('Accuracy on test samples: ' + str(score))
conf_matrix_data += [(predictions, y_test, i)]
stop_per_size = timeit.default_timer()
time_to_compute = round(stop_per_size - start_per_size)
print ('Time to compute: '+str(time_to_compute))
ModelData.loc[len(ModelData)] = [model_type, i, time_to_compute, early_stopping_epoch, training_accuracy, validation_accuracy, score] #store particular settings configuration, early stoppping epoch and accuracies in dataframe
stop = timeit.default_timer()
print ('\ntime to compute: '+str(stop-start))
I believe the input_shape is (128, 216, 1)
The issue here is that you don't have a time-axis to time distribute your CNN (DenseNet169) layer over.
In this step -
tensorflow.keras.layers.TimeDistributed(cnn, input_shape=(128,216,1)))
You are passing the 128 dimension axis as a time-axis. That means each of the CNN (DenseNet169) is left with a input shape of (216,1), which is not an image and therefore throws an error because it's expecting 3D tensors (images) and not 2D tensors.
Your input shape needs to be a 4D tensor something like - (10, 128, 216, 1), so that the 10 becomes the time axis (for time distributing), and (128, 216, 1) becomes an image input for the CNN (DenseNet169).
A solution with ragged tensors and time-distributed layer
IIUC, your data contains n audio files, each file containing a variable number of mel-spectrogram images.
You need to use tf.raggedtensors to be able to work with variable tensor shapes as inputs to the model
This requires an explicit definition of an Input layer where you set ragged=True
This allows you to pass each audio file as a single sample, with variable images, each of which will be time distributed.
You will have to use None as the time distributed axis shape while defining the model
1. Creating a dummy dataset
Let's start with a sample dataset -
import tensorflow as tf
from tensorflow.keras import layers, Model, utils, applications
#Assuming there are 5 audio files
num_audio = 5
data = []
#Create a random number of mel-spectrograms for each audio file
for i in range(num_audio):
n_images = np.random.randint(4,10)
print([i.shape for i in data])
[(5, 128, 216, 1),
(5, 128, 216, 1),
(9, 128, 216, 1),
(6, 128, 216, 1),
(4, 128, 216, 1)]
So, your data should be looking something like this. Here, I have a dummy dataset with 5 audio files, first one has 5 images of shape (128,216,1), while the last one has 4 images of the same shape.
2. Converting them to ragged-tensors
Next, let's convert and store these are ragged tensors. Ragged tensors allow variable-length objects to be stored, in this case, a variable number of images. Read more about them here.
#Convert each set of images (for each audio) to tensors and then a ragged tensor
tensors = [tensorflow.convert_to_tensor(i) for i in data]
X_train = tensorflow.ragged.stack(tensors).to_tensor()
#Creating dummy y_train, one for each audio files
y_train = tensorflow.convert_to_tensor(np.random.randint(0,2,(5,2)))
3. Create a model
I am using a functional API since I find it more readable and works better with an explicit input layer, but you can use input layers in Sequential API as well. Feel free to convert it to your preference.
Notice that I am using (None,128,216,1) as input shape. This creates 5 channels (first implicit one for batches) as - (Batch, audio_files, h, w, channels)
I have a dummy LSTM layer to showcase how the architecture works, feel free to stack more layers. Also, do note, that your DenseNet169 is only returning 2 features. And therefore your TimeDistributed layers is returning (None, None, 2) shaped tensor, where first None is the number of audio files, and the second None is the number of images (time axis). Therefore, do choose your next layers accordingly as 512 LSTM cells may be too much :)
#Create model
inp = layers.Input((None,128,216,1), ragged=True)
cnn = tensorflow.keras.applications.DenseNet169(include_top=True,
input_shape=(128,216,1), #<----- input shape for cnn is just the image
pooling=None, classes=2)
#Feel free to modify these layers!
x = layers.TimeDistributed(cnn)(inp)
x = layers.LSTM(8)(x)
out = layers.Dense(2)(x)
model = Model(inp, out)
utils.plot_model(model, show_shapes=True, show_layer_names=False)
4. Train!
The next step is simply to train. Feel free to add your own parameters.
model.fit(X_train, y_train, epochs=2)
Epoch 1/2
WARNING:tensorflow:5 out of the last 5 calls to <function Model.make_train_function.<locals>.train_function at 0x7f8e55b4fe50> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
1/1 [==============================] - 37s 37s/step - loss: 3.4057 - accuracy: 0.4000
Epoch 2/2
1/1 [==============================] - 16s 16s/step - loss: 3.3544 - accuracy: 0.4000
Hope that helps.

How do you write Keras model summary to a dataframe?

First, I'll say this is not the way to run a Keras model correctly. There should be a train and test set. The assignment was strictly to develop intuition so no test set.
I am running a model through several permutations of neurons, activation functions, batches and layers. Here is the code I am using.
from sklearn.datasets import make_classification
X1, y1 = make_classification(n_samples=90000, n_features=17, n_informative=6, n_redundant=0, n_repeated=0, n_classes=8, n_clusters_per_class=3, weights=None, flip_y=.3, class_sep=.4, hypercube=False, shift=3, scale=2, shuffle=True, random_state=840780)
class_num = 8
# ----------------------------------------------------------------
import itertools
final_param_list = []
# param_list_gen order is units, activation function, batch size, layers
param_list_gen = [[10, 20, 50], ["sigmoid", "relu", "LeakyReLU"], [8, 16, 32], [1, 2]]
for element in itertools.product(*param_list_gen):
# --------------------------------------------------------------------------------------
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, LeakyReLU
from keras.callbacks import History
import tensorflow as tf
import numpy as np
import pandas as pd
# --------------------------------------------------------------------------------------
# -------- Model 1 - permutations of neurons, activation funtions batch size and layers -------- #
for param in final_param_list:
q2model1 = Sequential()
# hidden layer 1
if param[1] != 'LeakyReLU':
if param[3] == 2:
# hidden layer 2
if param[1] != 'LeakyReLU':
# output layer
q2model1.add(Dense(class_num, activation='softmax'))
q2model1.compile(loss='sparse_categorical_crossentropy', optimizer='RMSProp', metrics=['accuracy'])
# Step 3: Fit the model
history = q2model1.fit(X1, y1, epochs=20)
Seems to work fine. Now, I've been tasked to output the accuracy of each epoch and include the neurons, activation function, batches, layers
Now, this gives me all of the accuracies for each epoch
This gives me the params
This gives me a summary although I'm not sure if this is the best approach
Is there a way to print out each epoch to a pandas dataframe so it looks like this?
Phase(list index + 1) | # Neurons | Activation function | Batch size | Layers | Acc epoch1 | Acc epoch2 | ......... | Acc epoch20
That's about it. If you see anything in the model itself that is blatantly wrong or if I am missing some key code please let me know
You can try out:
import pandas as pd
# assuming you stored your model.fit results in a 'history' variable:
history = model.fit(x_train, y_train, epochs=20)
# convert the history.history dictionary to a pandas dataframe:
hist_df = pd.DataFrame(history.history)
# checkout result with print e.g.:
# or the describe() method:
Keras also have a CSVLogger: https://keras.io/callbacks/#csvlogger which may be of interest.

Get Cell, Input Gate, Output Gate and Forget Gate activation values for LSTM network using Keras

I want to get the activation values for a given input of a trained LSTM network, specifically the values for the cell, the input gate, the output gate and the forget gate. According to this Keras issue and this Stackoverflow question I'm able to get some activation values with the following code:
(basically I'm trying to classify 1-dimensional timeseries using one label per timeseries, but that doesn't really matter for this general question)
import random
from pprint import pprint
import keras.backend as K
import numpy as np
from keras.layers import Dense
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.utils import to_categorical
def getOutputLayer(layerNumber, model, X):
return K.function([model.layers[0].input],
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), stateful=True))
model.add(Dense(2, activation='softmax'))
loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
# generate some test data
for i in range(10):
# generate a random timeseries of 100 numbers
X = np.random.rand(10)
X = X.reshape(10, 1, 1)
# generate a random label for the whole timeseries between 0 and 1
y = to_categorical([random.randint(0, 1)] * 10, num_classes=2)
# train the lstm for this one timeseries
model.fit(X, y, epochs=1, batch_size=1, verbose=0)
# to keep the output simple use only 5 steps for the input of the timeseries
X_test = np.random.rand(5)
X_test = X_test.reshape(5, 1, 1)
# get the activations for the output lstm layer
pprint(getOutputLayer(0, model, X_test))
Using that I get the following activation values for the LSTM layer:
[array([[-0.04106992, -0.00327154, -0.01524276, 0.0055838 , 0.00969929,
-0.01438944, 0.00211149, -0.04286387, -0.01102304, 0.0113989 ],
[-0.05771339, -0.00425535, -0.02032563, 0.00751972, 0.01377549,
-0.02027745, 0.00268653, -0.06011265, -0.01602218, 0.01571197],
[-0.03069103, -0.00267129, -0.01183739, 0.00434298, 0.00710012,
-0.01082268, 0.00175544, -0.0318702 , -0.00820942, 0.00871707],
[-0.02062054, -0.00209525, -0.00834482, 0.00310852, 0.0045242 ,
-0.00741894, 0.00141046, -0.02104726, -0.0056723 , 0.00611038],
[-0.05246543, -0.0039417 , -0.01877101, 0.00691551, 0.01250046,
-0.01839472, 0.00250443, -0.05472757, -0.01437504, 0.01434854]],
So I get for each input value 10 values, because I specified in the Keras model to use a LSTM with 10 neurons. But which one is a cell, which is is the input gate, which one the output gate, which one the forget gate?
Well, these are the output values, to get and look into the value of each gate look into this issue
I paste the essential part here
for i in range(epochs):
print('Epoch', i, '/', epochs)
for layer in model.layers:
if 'LSTM' in str(layer):
print('states[0] = {}'.format(K.get_value(layer.states[0])))
print('states[1] = {}'.format(K.get_value(layer.states[1])))
print('b_i = {}'.format(K.get_value(layer.b_i)))
print('W_i = {}'.format(K.get_value(layer.W_i)))
print('U_i = {}'.format(K.get_value(layer.U_i)))
print('b_f = {}'.format(K.get_value(layer.b_f)))
print('W_f = {}'.format(K.get_value(layer.W_f)))
print('U_f = {}'.format(K.get_value(layer.U_f)))
print('b_c = {}'.format(K.get_value(layer.b_c)))
print('W_c = {}'.format(K.get_value(layer.W_c)))
print('U_c = {}'.format(K.get_value(layer.U_c)))
print('b_o = {}'.format(K.get_value(layer.b_o)))
print('W_o = {}'.format(K.get_value(layer.W_o)))
print('U_o = {}'.format(K.get_value(layer.U_o)))
# output of the first batch value of the batch after the first fit().
first_batch_element = np.expand_dims(cos[0], axis=1) # (1, 1) to (1, 1, 1)
print('output = {}'.format(get_LSTM_output([first_batch_element])[0].flatten()))
predicted_output = model.predict(cos, batch_size=batch_size)
print('Ploting Results')
plt.subplot(2, 1, 1)
plt.subplot(2, 1, 2)
