Training autoencoder for variant length time series - Tensorflow - python

I am trying to train a LSTM model to reconstruct time series data. I have a data set of ~1800 univariant time-series.
Basically I'm trying to solve a problem similar to this one Anomaly detection in ECG plots, but my time series have different lengths.
I used this approach to deal with variant length:
How to apply LSTM-autoencoder to variant-length time-series data?
and this approach to split the input data based on shape:
Keras misinterprets training data shape
When looping over the data and fitting a model for every shape. is the model eventually only based on the last shape it trained on or is it using all the data to train the final model?
How would I train the model on all input data regardless shape of data?
I know I can add padding but I am trying to use the data as is at this point.
Any suggestions or other approaches to deal with different length on timeseries?
(It is not an issue of time sampling it is more of one timeseries started recording on day X and some only on day X+100)
Here is the code I am using for my autoencoder:
import keras.backend as K
from keras.layers import (Input, Dense, TimeDistributed, LSTM, GRU, Dropout, merge,
Flatten, RepeatVector, Bidirectional, SimpleRNN, Lambda)
def encoder(model_input, layer, size, num_layers, drop_frac=0.0, output_size=None,
bidirectional=False):
"""Encoder module of autoencoder architecture"""
if output_size is None:
output_size = size
encode = model_input
for i in range(num_layers):
wrapper = Bidirectional if bidirectional else lambda x: x
encode = wrapper(layer(size, name='encode_{}'.format(i),
return_sequences=(i < num_layers - 1)))(encode)
if drop_frac > 0.0:
encode = Dropout(drop_frac, name='drop_encode_{}'.format(i))(encode)
encode = Dense(output_size, activation='linear', name='encoding')(encode)
return encode
def repeat(x):
stepMatrix = K.ones_like(x[0][:,:,:1]) #matrix with ones, shaped as (batch, steps, 1)
latentMatrix = K.expand_dims(x[1],axis=1) #latent vars, shaped as (batch, 1, latent_dim)
return K.batch_dot(stepMatrix,latentMatrix)
def decoder(encode, layer, size, num_layers, drop_frac=0.0, aux_input=None,
bidirectional=False):
"""Decoder module of autoencoder architecture"""
decode = Lambda(repeat)([inputs,encode])
if aux_input is not None:
decode = merge([aux_input, decode], mode='concat')
for i in range(num_layers):
if drop_frac > 0.0 and i > 0: # skip these for first layer for symmetry
decode = Dropout(drop_frac, name='drop_decode_{}'.format(i))(decode)
wrapper = Bidirectional if bidirectional else lambda x: x
decode = wrapper(layer(size, name='decode_{}'.format(i),
return_sequences=True))(decode)
decode = TimeDistributed(Dense(1, activation='linear'), name='time_dist')(decode)
return decode
inputs = Input(shape=(None, 1))
encoded = encoder(inputs,LSTM,128, 2, drop_frac=0.0, output_size=None, bidirectional=False)
decoded = decoder(encoded, LSTM, 128, 2, drop_frac=0.0, aux_input=None,
bidirectional=False,)
sequence_autoencoder = Model(inputs, decoded)
sequence_autoencoder.compile(optimizer='adam', loss='mae')
trainByShape = {}
for item in train_data:
if item.shape in trainByShape:
trainByShape[item.shape].append(item)
else:
trainByShape[item.shape] = [item]
for shape in trainByShape:
modelHistory =sequence_autoencoder.fit(
np.asarray(trainByShape[shape]),
np.asarray(trainByShape[shape]),
epochs=100, batch_size=1, validation_split=0.15)

use a bidirectional lstm and increase the number of parameters to gain accuracy. I increased the latent_dim to 1000 and it fit the data closely. More hardware and more memory.
def create_dataset(dataset, look_back=3):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
COLUMNS=['Open']
dataset=eqix_df[COLUMNS]
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(np.array(dataset).reshape(-1,1))
train_size = int(len(dataset) * 0.70)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
look_back=10
trainX=[]
testX=[]
y_train=[]
trainX, y_train = create_dataset(train, look_back)
testX, y_test = create_dataset(test, look_back)
X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
X_test = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
latent_dim=700
n_future=1
model = Sequential()
model.add(Bidirectional(LSTM(units=latent_dim, return_sequences=True,
input_shape=(X_train.shape[1], 1))))
#LSTM 1
model.add(Bidirectional(LSTM(latent_dim,return_sequences=True,dropout=0.4,recurrent_dropout=0.4,name='lstm1')))
#LSTM 2
model.add(Bidirectional(LSTM(latent_dim,return_sequences=True,dropout=0.2,recurrent_dropout=0.4,name='lstm2')))
#LSTM 3
model.add(Bidirectional(LSTM(latent_dim, return_sequences=False,dropout=0.2,recurrent_dropout=0.4,name='lstm3')))
model.add(Dense(units = n_future))
model.compile(optimizer="adam", loss="mean_squared_error", metrics=["acc"])
history=model.fit(X_train, y_train,epochs=50,verbose=0)
plt.plot(history.history['loss'])
plt.title('loss accuracy')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
#print(X_test)
prediction = model.predict(X_test)
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(prediction)+look_back, :] = prediction
# shift test predictions for plotting
#plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot, color='red')
#plt.plot(testPredictPlot)
#plt.legend(['Actual','Train','Test'])
x=np.linspace(look_back,len(prediction)+look_back,len(y_test))
plt.plot(x,y_test)
plt.show()

Keras LSTM implementation expect a input of type: (Batch, Timesteps, Features).
One solution would be to set Timesteps = 1 and pass the sequence lengths as the Batch dimensions.
If the sampling procedure is the same (no need for resampling), and the difference in length only comes from when the recording time start (X+100 instead of X), I would try to get rid off the lag in the pre-processing stages to get the section of interest only.

Part 1: Plotting the irregular heartbeat. Part 2 is a DENSE network to classify incoming heartbeat voltage to predict irregular beat patterns. 94% accuracy!
from scipy.io import arff
import pandas as pd
from scipy.misc import electrocardiogram
import matplotlib.pyplot as plt
import numpy as np
data = arff.loadarff('ECG5000_TRAIN.arff')
df = pd.DataFrame(data[0])
#for column in df.columns:
# print(column)
columns=[x for x in df.columns if x!="target"]
print(columns)
#print(df[df.target == "b'1'"].drop(labels='target', axis=1).mean(axis=0).to_numpy())
normal=df.query("target==b'1'").drop(labels='target', axis=1).mean(axis=0).to_numpy()
rOnT=df.query("target==b'2'").drop(labels='target', axis=1).mean(axis=0).to_numpy()
pcv=df.query("target==b'3'").drop(labels='target', axis=1).mean(axis=0).to_numpy()
sp=df.query("target==b'4'").drop(labels='target', axis=1).mean(axis=0).to_numpy()
ub=df.query("target==b'5'").drop(labels='target', axis=1).mean(axis=0).to_numpy()
plt.plot(normal,label="Normal")
plt.plot(rOnT,label="R on T",alpha=.3)
plt.plot(pcv, label="PCV",alpha=.3)
plt.plot(sp, label="SP",alpha=.3)
plt.plot(ub, label="UB",alpha=.3)
plt.legend()
plt.title("ECG")
plt.show()
Frame by frame comparision for normal. There are bands of operation which a normal heart stays with:
def PlotTheFrames(df,title,color):
fig,ax = plt.subplots(figsize=(140,50))
for key,item in df.iterrows():
array=[]
for value in np.array(item).flatten():
array.append(value);
x=np.linspace(0,100,len(array))
ax.plot(x,array,c=color)
plt.title(title)
plt.show()
normal=df.query("target==b'1'").drop(labels='target', axis=1)
PlotTheFrames(normal,"Normal Heart beat",'r')
R on T the valves don't seem to be operating correctly
rOnT=df.query("target==b'2'").drop(labels='target', axis=1)
PlotTheFrames(rOnT,"R on T Heart beat","b")
Use a deep learning dense network instead of LSTM! I used leakyReLU for the smaller gradient descent
X=df[columns]
y=pd.get_dummies(df['target'])
model=Sequential()
model.add(Dense(440, input_shape=(len(columns),),activation='LeakyReLU'))
model.add(Dropout(0.4))
model.add(Dense(280, activation='LeakyReLU'))
model.add(Dropout(0.2))
model.add(Dense(240, activation='LeakyReLU'))
model.add(Dense(32, activation='LeakyReLU'))
model.add(Dense(16, activation='LeakyReLU'))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train=scaler.transform(X_train)
X_test=scaler.transform(X_test)
history=model.fit(X_train, y_train,epochs = 1000,verbose=0)
model.evaluate(X_test, y_test)
plt.plot(history.history['loss'])
plt.title('loss accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

Related

How to implement a CNN-LSTM using Keras

I am attempting to implement a CNN-LSTM that classifies mel-spectrogram images representing the speech of people with Parkinson's Disease/Healthy Controls. I am trying to implement a pre-existing model (DenseNet-169) with an LSTM model, however I am running into the following error: ValueError: Input 0 of layer zero_padding2d is incompatible with the layer: expected ndim=4, found ndim=3. Full shape received: [None, 216, 1]. Can anyone advise where I'm going wrong?
import librosa
import os
import glob
import IPython.display as ipd
from pathlib import Path
import timeit
import time, sys
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import numpy as np
import cv2
import seaborn as sns
%tensorflow_version 1.x #version 1 works without problems
import tensorflow
from tensorflow.keras import models
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import TimeDistributed
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense, BatchNormalization, Activation, GaussianNoise, LSTM
from sklearn.metrics import accuracy_score
DATA_DIR = Path('/content/drive/MyDrive/PhD_Project_Experiments/Spontaneous_Dialogue_PD_Dataset')
diagnosis = [x.name for x in DATA_DIR.glob('*') if x.is_dir()]
diagnosis
def create_paths_ds(paths: Path, label: str) -> list:
EXTENSION_TYPE = '.wav'
return [(x, label) for x in paths.glob('*' + EXTENSION_TYPE)]
from collections import Counter
categories_to_use = [
'Parkinsons_Disease',
'Healthy_Control',
]
NUM_CLASSES = len(categories_to_use)
print(f'Number of classes: {NUM_CLASSES}')
paths_all_labels = []
for cat in categories_to_use:
paths_all_labels += create_paths_ds(DATA_DIR / cat, cat)
X_train, X_test = train_test_split(paths_all_labels,test_size=0.1, stratify = [paths_all_labels[y][1] for y in range(len(paths_all_labels))] ) #fix stratified sampling for test data
X_train, X_val = train_test_split(X_train, test_size=0.2, stratify = [X_train[y][1] for y in range(len(X_train))] )
for i in categories_to_use:
print('Number of train samples for '+i+': '+ str([X_train[y][1] for y in range(len(X_train))].count(i))) #checks whether train samples are equally divided
print('Number of test samples for '+i+': '+ str([X_test[y][1] for y in range(len(X_test))].count(i))) #checks whether test samples are equally divided
print('Number of validation samples for '+i+': '+ str([X_val[y][1] for y in range(len(X_val))].count(i))) #checks whether val samples are equally divided
print(f'Train length: {len(X_train)}')
print(f'Validation length: {len(X_val)}')
print(f'Test length: {len(X_test)}')
def load_and_preprocess_lstm(dataset, SAMPLE_SIZE = 30):
IMG_SIZE = (216,128)
progress=0
data = []
labels = []
for (path, label) in dataset:
audio, sr = librosa.load(path)
dur = librosa.get_duration(audio, sr = sr)
sampleNum = int(dur / SAMPLE_SIZE)
offset = (dur % SAMPLE_SIZE) / 2
for i in range(sampleNum):
audio, sr = librosa.load(path, offset= offset+i, duration=SAMPLE_SIZE)
sample = librosa.feature.melspectrogram(audio, sr=sr)
# print(sample.shape)
sample = cv2.resize(sample, dsize=IMG_SIZE)
sample = np.expand_dims(sample,-1)
print(sample.shape)
data += [(sample, label)]
labels += [label]
progress +=1
print('\r Progress: '+str(round(100*progress/len(dataset))) + '%', end='')
return data, labels
def retrieve_samples(sample_size, model_type):
if model_type == 'cnn':
print("\nLoading train samples")
X_train_samples, train_labels = load_and_preprocess_cnn(X_train,sample_size)
print("\nLoading test samples")
X_test_samples, test_labels = load_and_preprocess_cnn(X_test,sample_size)
print("\nLoading val samples")
X_val_samples, val_labels = load_and_preprocess_cnn(X_val,sample_size)
print('\n')
elif model_type == 'lstm':
print("\nLoading train samples")
X_train_samples, train_labels = load_and_preprocess_lstm(X_train,sample_size)
print("\nLoading test samples")
X_test_samples, test_labels = load_and_preprocess_lstm(X_test,sample_size)
print("\nLoading val samples")
X_val_samples, val_labels = load_and_preprocess_lstm(X_val,sample_size)
print('\n')
elif model_type == "cnnlstm":
print("\nLoading train samples")
X_train_samples, train_labels = load_and_preprocess_lstm(X_train,sample_size)
print("\nLoading test samples")
X_test_samples, test_labels = load_and_preprocess_lstm(X_test,sample_size)
print("\nLoading val samples")
X_val_samples, val_labels = load_and_preprocess_lstm(X_val,sample_size)
print('\n')
print("shape: " + str(X_train_samples[0][0].shape))
print("number of training samples: "+ str(len(X_train_samples)))
print("number of validation samples: "+ str(len(X_val_samples)))
print("number of test samples: "+ str(len(X_test_samples)))
return X_train_samples, X_test_samples, X_val_samples
def create_cnn_lstm_model(input_shape):
model = Sequential()
cnn = tensorflow.keras.applications.DenseNet169(include_top=True, weights=None, input_tensor=None, input_shape=input_shape, pooling=None, classes=2)
# define LSTM model
model.add(tensorflow.keras.layers.TimeDistributed(cnn, input_shape=input_shape))
model.add(LSTM(units = 512, dropout=0.5, recurrent_dropout=0.3, return_sequences = True, input_shape = input_shape))
model.add(LSTM(units = 512, dropout=0.5, recurrent_dropout=0.3, return_sequences = False))
model.add(Dense(units=NUM_CLASSES, activation='sigmoid'))#Compile
model.compile(loss=tensorflow.keras.losses.binary_crossentropy, optimizer='adam', metrics=['accuracy'])
print(model.summary())
return model
def create_model_data_and_labels(X_train_samples, X_val_samples, X_test_samples):
#Prepare samples to work for training the model
labelizer = LabelEncoder()
#prepare training data and labels
x_train = np.array([x[0] for x in X_train_samples])
y_train = np.array([x[1] for x in X_train_samples])
y_train = labelizer.fit_transform(y_train)
y_train = to_categorical(y_train)
#prepare validation data and labels
x_val = np.array([x[0] for x in X_val_samples])
y_val = np.array([x[1] for x in X_val_samples])
y_val = labelizer.transform(y_val)
y_val = to_categorical(y_val)
#prepare test data and labels
x_test = np.array([x[0] for x in X_test_samples])
y_test = np.array([x[1] for x in X_test_samples])
y_test = labelizer.transform(y_test)
y_test = to_categorical(y_test)
return x_train, y_train, x_val, y_val, x_test, y_test, labelizer
#Main loop for testing multiple sample sizes
#choose model type: 'cnn' or 'lstm'
model_type = 'cnnlstm'
n_epochs = 20
patience= 20
es = EarlyStopping(patience=20)
fragment_sizes = [5,10]
start = timeit.default_timer()
ModelData = pd.DataFrame(columns = ['Model Type','Fragment size (s)', 'Time to Compute (s)', 'Early Stopping epoch', 'Training accuracy', 'Validation accuracy', 'Test Accuracy']) #create a DataFrame for storing the results
conf_matrix_data = []
for i in fragment_sizes:
start_per_size = timeit.default_timer()
print(f'\n---------- Model trained on fragments of size: {i} seconds ----------------')
X_train_samples, X_test_samples, X_val_samples = retrieve_samples(i,model_type)
x_train, y_train, x_val, y_val, x_test, y_test, labelizer = create_model_data_and_labels(X_train_samples, X_val_samples, X_test_samples)
if model_type == 'cnn':
model = create_cnn_model(X_train_samples[0][0].shape)
elif model_type == 'lstm':
model = create_lstm_model(X_train_samples[0][0].shape)
elif model_type == 'cnnlstm':
model = create_cnn_lstm_model(X_train_samples[0][0].shape)
history = model.fit(x_train, y_train,
batch_size = 8,
epochs=n_epochs,
verbose=1,
callbacks=[es],
validation_data=(x_val, y_val))
print('Finished training')
early_stopping_epoch = len(history.history['accuracy'])
training_accuracy = history.history['accuracy'][early_stopping_epoch-1-patience]
validation_accuracy = history.history['val_accuracy'][early_stopping_epoch-1-patience]
plot_data(history, i)
predictions = model.predict(x_test)
score = accuracy_score(labelizer.inverse_transform(y_test.argmax(axis=1)), labelizer.inverse_transform(predictions.argmax(axis=1)))
print('Fragment size = ' + str(i) + ' seconds')
print('Accuracy on test samples: ' + str(score))
conf_matrix_data += [(predictions, y_test, i)]
stop_per_size = timeit.default_timer()
time_to_compute = round(stop_per_size - start_per_size)
print ('Time to compute: '+str(time_to_compute))
ModelData.loc[len(ModelData)] = [model_type, i, time_to_compute, early_stopping_epoch, training_accuracy, validation_accuracy, score] #store particular settings configuration, early stoppping epoch and accuracies in dataframe
stop = timeit.default_timer()
print ('\ntime to compute: '+str(stop-start))
I believe the input_shape is (128, 216, 1)
The issue here is that you don't have a time-axis to time distribute your CNN (DenseNet169) layer over.
In this step -
tensorflow.keras.layers.TimeDistributed(cnn, input_shape=(128,216,1)))
You are passing the 128 dimension axis as a time-axis. That means each of the CNN (DenseNet169) is left with a input shape of (216,1), which is not an image and therefore throws an error because it's expecting 3D tensors (images) and not 2D tensors.
Your input shape needs to be a 4D tensor something like - (10, 128, 216, 1), so that the 10 becomes the time axis (for time distributing), and (128, 216, 1) becomes an image input for the CNN (DenseNet169).
A solution with ragged tensors and time-distributed layer
IIUC, your data contains n audio files, each file containing a variable number of mel-spectrogram images.
You need to use tf.raggedtensors to be able to work with variable tensor shapes as inputs to the model
This requires an explicit definition of an Input layer where you set ragged=True
This allows you to pass each audio file as a single sample, with variable images, each of which will be time distributed.
You will have to use None as the time distributed axis shape while defining the model
1. Creating a dummy dataset
Let's start with a sample dataset -
import tensorflow as tf
from tensorflow.keras import layers, Model, utils, applications
#Assuming there are 5 audio files
num_audio = 5
data = []
#Create a random number of mel-spectrograms for each audio file
for i in range(num_audio):
n_images = np.random.randint(4,10)
data.append(np.random.random((n_images,128,216,1)))
print([i.shape for i in data])
[(5, 128, 216, 1),
(5, 128, 216, 1),
(9, 128, 216, 1),
(6, 128, 216, 1),
(4, 128, 216, 1)]
So, your data should be looking something like this. Here, I have a dummy dataset with 5 audio files, first one has 5 images of shape (128,216,1), while the last one has 4 images of the same shape.
2. Converting them to ragged-tensors
Next, let's convert and store these are ragged tensors. Ragged tensors allow variable-length objects to be stored, in this case, a variable number of images. Read more about them here.
#Convert each set of images (for each audio) to tensors and then a ragged tensor
tensors = [tensorflow.convert_to_tensor(i) for i in data]
X_train = tensorflow.ragged.stack(tensors).to_tensor()
#Creating dummy y_train, one for each audio files
y_train = tensorflow.convert_to_tensor(np.random.randint(0,2,(5,2)))
3. Create a model
I am using a functional API since I find it more readable and works better with an explicit input layer, but you can use input layers in Sequential API as well. Feel free to convert it to your preference.
Notice that I am using (None,128,216,1) as input shape. This creates 5 channels (first implicit one for batches) as - (Batch, audio_files, h, w, channels)
I have a dummy LSTM layer to showcase how the architecture works, feel free to stack more layers. Also, do note, that your DenseNet169 is only returning 2 features. And therefore your TimeDistributed layers is returning (None, None, 2) shaped tensor, where first None is the number of audio files, and the second None is the number of images (time axis). Therefore, do choose your next layers accordingly as 512 LSTM cells may be too much :)
#Create model
inp = layers.Input((None,128,216,1), ragged=True)
cnn = tensorflow.keras.applications.DenseNet169(include_top=True,
weights=None,
input_tensor=None,
input_shape=(128,216,1), #<----- input shape for cnn is just the image
pooling=None, classes=2)
#Feel free to modify these layers!
x = layers.TimeDistributed(cnn)(inp)
x = layers.LSTM(8)(x)
out = layers.Dense(2)(x)
model = Model(inp, out)
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics='accuracy')
utils.plot_model(model, show_shapes=True, show_layer_names=False)
4. Train!
The next step is simply to train. Feel free to add your own parameters.
model.fit(X_train, y_train, epochs=2)
Epoch 1/2
WARNING:tensorflow:5 out of the last 5 calls to <function Model.make_train_function.<locals>.train_function at 0x7f8e55b4fe50> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating #tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your #tf.function outside of the loop. For (2), #tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
1/1 [==============================] - 37s 37s/step - loss: 3.4057 - accuracy: 0.4000
Epoch 2/2
1/1 [==============================] - 16s 16s/step - loss: 3.3544 - accuracy: 0.4000
Hope that helps.

rescale and inverse_transform not returning predicted and actual values in original scale for LSTM multivariate time-series forecast

I'm currently building an LSTM multivariate time-series model to predict one output at current time (t) using 22 features from the previous timestamp (t-1) as inputs. I've been following the instructions from this example, and everything seems to be working correctly, but the one issue I'm having involves the final inverse_transform() functions.
Specifically, I'm expecting that once I invert the scaling for both the forecasted values and the actual values, the output will be in the same units as the original data before I normalized and transformed my dataset. The variable I'm trying to predict has a mean of around 29.186, but when I run inverse_transform() and plot the predictions against the actuals, the output of my results ranges from 3.30 - 3.55.
It could be that I've made a simple error slicing the array, or it could be something completely different, but I can't seem to identify the root cause of why my predicted and actual values aren't in the same units as the data before I transformed everything.
For additional context, here's the printed output of train_X.shape, train_y.shape, test_X.shape, test_y.shape:
(1804950, 1, 22) (1804950,) (849389, 1, 22) (849389,)
Here's the relevant parts of my code:
# ensure all data is float
values = ips_data.values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
# frame as supervised learning
ips_reframed = series_to_supervised(scaled, 1, 1)
# drop a bunch of columns I don't want to predict
ips_reframed.drop(ips_reframed.columns[[22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,42,43]], axis=1, inplace=True)
# print(ips_reframed.head())
# split into train and test sets
values = ips_reframed.values
# n_train_hours = (365 * 24 * 60) * 3
n_train_hours = int(len(values) * 0.68)
train = values[:n_train_hours, :]
test = values[n_train_hours:, :]
# split into input and outputs
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
# print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
# design network
model = Sequential()
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(50))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation("linear"))
model.compile(loss='mae', optimizer='adam')
# fit network
history = model.fit(train_X, train_y, epochs=100, batch_size=1000,
validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot history
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()
# make a prediction
yhat = model.predict(test_X)
# invert scaling for forecast
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
# print('Test RMSE: %.3f' % rmse)
# plotting the predictions
plt.plot(inv_yhat[-100:], label='predictions')
plt.plot(inv_y[-100:], label='actual')
plt.title("Prediction vs. Actual")
plt.legend()
plt.show()
Please let me know if you need any additional information or context, and thank you so much for your help!

GAN doesn't proceed training very well

I have programmed a GAN model using keras but the training didn't go well. The generator model always returns a bare noise image (28x28 size) instead of something similar to mnist dataset. This doesn't give me any error though, when it comes to training discriminator model will become trainable=False, which is not what I want to do.
If this implementation is bad, please let me know. Can anyone help?
import os
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization
from keras.optimizers import SGD, Adam, RMSprop
from keras.datasets import mnist
from keras.regularizers import l1_l2
def plot_generated(noise, Generator):
image_fake = Generator.predict(noise)
plt.figure(figsize=(10,8))
plt.show()
plt.close()
def plot_metircs(metrics, epoch=None):
plt.figure(figsize=(10,8))
plt.plot(metrics['d'], label='discriminative loss', color='b')
plt.legend()
plt.show()
plt.close()
plt.figure(figsize=(10,8))
plt.plot(metrics['g'], label='generative loss', color='r')
plt.legend()
plt.show()
plt.close()
def Generator():
model = Sequential()
LeakyReLU = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)
model.add(Dense(input_dim=100, units=128, activation=LeakyReLU, name='g_input'))
model.add(Dense(input_dim=128, units=784, activation='tanh', name='g_output'))
return model
def Discriminator():
model = Sequential()
LeakyReLU = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)
model.add(Dense(input_dim=784, units=128, activation=LeakyReLU, name='d_input'))
model.add(Dense(input_dim=128, units=1, activation='sigmoid', name='d_output'))
model.compile(loss='binary_crossentropy', optimizer='Adam')
return model
def Generative_Adversarial_Network(Generator, Discriminator):
model = Sequential()
model.add(Generator)
model.add(Discriminator)
# train only generator in the entire GAN architecture
Discriminator.trainable = False
model.compile(loss='binary_crossentropy', optimizer='Adam')
return model
def Training(z_input_size, Generator, Discriminator, GAN, loss_dict, X_train, epoch, batch, smooth):
for e in range(epoch):
# z: noise, used for input of G to generate fake image based on this noise! it's like a seed
noise = np.random.uniform(-1, 1, size=[batch, z_input_size])
image_fake = Generator.predict_on_batch(noise)
# sampled real_image from dataset
rand_train_index = np.random.randint(0, X_train.shape[0], size=batch)
image_real = X_train[rand_train_index, :]
# concatenate real and fake images
"""
X = [
image_real => label : 1 (we can multiply a smoothing factor)
image_fake => label : 0
]
"""
X = np.vstack((image_real, image_fake))
y = np.zeros(len(X))
# putting label "1" to image_real
y[len(image_real):] = 1*(1 - smooth)
y = y.astype(int)
# train only discriminator
d_loss = Discriminator.train_on_batch(x=X, y=y)
# NOTE: remember?? we set discriminator OFF during the training of GAN!
# So, we can safely train only generator, weight of discriminator set fixed!
g_loss = GAN.train_on_batch(x=noise, y=y[len(noise):])
loss_dict['d'].append(d_loss)
loss_dict['g'].append(g_loss)
if e%1000 == 0:
plt.imshow(image_fake)
plt.show()
plot_generated(noise, Generator)
plot_metircs(loss_dict)
return "done!"
Gen = Generator()
Dis = Discriminator()
GAN = Generative_Adversarial_Network(Gen, Dis)
GAN.summary()
Gen.summary()
Dis.summary()
gan_losses = {"d":[], "g":[], "f":[]}
epoch = 30000
batch = 1000
smooth = 0.9
z_input_size = 100
row, col = 28, 28
z_group_matrix = np.random.uniform(0, 1, examples*z_input_size)
z_group_matrix = z_group_matrix.reshape([9, z_input_size])
print(z_group_matrix.shape)
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train.reshape(X_train.shape[0], row*col), X_test.reshape(X_test.shape[0], row*col)
X_train.astype('float32')
X_test.astype('float32')
X_train, X_test = X_train/255, X_test/255
print('X_train shape: ', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
Training(z_input_size, Gen, Dis, GAN, loss_dict=gan_losses, X_train=X_train, epoch=epoch, batch=batch, smooth=smooth)
The model itself is correct.
I would suggest a few minor changes:
smooth 0.9 is too much. Make it close to 0.1.
Leak Factor you have is 0.2, usually its a very small decimal close to 0; take around
0.01/0.02.
Batchsize around 400
Epochs around 2000
And finally early stopping with a bit large threshold.

LSTM timeseries prediction with multiple outputs

I have a dataset with 3 features in a timeseries. The dimension of the dataset is 1000 x 3 (1000 timesteps and 3 features). Basically, 1000 rows and 3 columns
The data looks like this:
A B C
131 111 100
131 110 120
131 100 100
...
131 100 100
The problem is how to train the first 25 steps and predict the next 25 steps in order to get the output of 3 features predictions which is (A, B and C). I successful train and predict 1-D (1 features(A)) array. But I have no idea how to predict the 3 features using same the dataset.
And I got this error:
Error when checking target: expected dense_1 to have shape (None, 3) but got array with shape (21, 1)
The code as below:
# -*- coding: utf-8 -*-
import numpy as np
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset) - look_back - 1):
a = dataset[i:(i + look_back):]
dataX.append(a)
dataY.append(dataset[i + look_back, :])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
numpy.random.seed(7)
# load the dataset
dataframe = pandas.read_csv('v77.csv', engine='python',skiprows=0)
dataset = dataframe.values
print dataset
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = 10
test_size = 10
train, test = dataset[0:train_size, :], dataset[train_size:train_size+test_size, :]
print (train_size,test_size)
# reshape into X=t and Y=t+1
look_back = 3
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
print trainX
# reshape input to be [samples, time steps, features]
#trainX = numpy.reshape(trainX, (trainX.shape[0], look_back, 3))
#testX = numpy.reshape(testX, (testX.shape[0],look_back, 3))
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(32, input_shape=(3,3)))
model.add(Dense(3))
model.compile(loss='mean_squared_error', optimizer='adam')
history= model.fit(trainX, trainY,validation_split=0.33, nb_epoch=10, batch_size=16)
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# print testPredict
# print np.shape(testPredict)
# Get something which has as many features as dataset
trainPredict_extended = numpy.zeros((len(trainPredict),3))
print trainPredict_extended
print np.shape(trainPredict_extended[:,2])
print np.shape(trainPredict[:,0])
# Put the predictions there
trainPredict_extended[:,2] = trainPredict[:,0]
# Inverse transform it and select the 3rd coumn.
trainPredict = scaler.inverse_transform(trainPredict_extended) [:,2]
# print(trainPredict)
# Get something which has as many features as dataset
testPredict_extended = numpy.zeros((len(testPredict),3))
# Put the predictions there
testPredict_extended[:,2] = testPredict[:,0]
# Inverse transform it and select the 3rd column.
testPredict = scaler.inverse_transform(testPredict_extended)[:,2]
# print testPredict_extended
trainY_extended = numpy.zeros((len(trainY),3))
trainY_extended[:,2]=trainY
trainY=scaler.inverse_transform(trainY_extended)[:,2]
testY_extended = numpy.zeros((len(testY),3))
testY_extended[:,2]=testY
testY=scaler.inverse_transform(testY_extended)[:,2]
# print
# print testY
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY, trainPredict))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY, testPredict))
print('Test Score: %.2f RMSE' % (testScore))
Sample data:
v77.txt
Help Needed. Thanks
Your Y shape does not match up with the last layer in your model. Your Y is in the form of (num_samples, 1), which means that for every sample it outputs a vector of length 1.
Your last layer, however, is a Dense(3) layer, which outputs (num_samples, 3), which means that for every sample it outputs a vector of length 3.
Since the output of your neural network and your y-data aren't in the same format, the neural network cannot train.
You can fix this in two ways:
1.Convert the output of your neural network to the shape of your y data by replacing Dense(3) with Dense(1):
model = Sequential()
model.add(LSTM(32, input_shape=(3,3)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')history= model.fit(trainX, trainY,validation_split=0.33, nb_epoch=10, batch_size=16)
2.Convert the shape of your y data to the output of your neural network by modifying your create_dataset() function such that all of the features are added to the y instead of just one:
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset) - look_back - 1):
a = dataset[i:(i + look_back):]
dataX.append(a)
dataY.append(dataset[i + look_back, :])
return numpy.array(dataX), numpy.array(dataY)
Since you stated that you wanted to predict 3 feature most likely you will be using the second option. Note that the second option does break the last part of your code to extend the y, but your model trains fine.

Keras - How to actually use an autoencoder

I have developed the following autoencoder in Keras for the purpose of dimension reduction.
from keras.layers import Input, Dense, BatchNormalization
from keras.models import Model
from keras import regularizers
from keras.callbacks import TensorBoard, EarlyStopping
import keras.backend as K
from sklearn.metrics import r2_score
input_size = len(spot_dat.columns)
coder_size = 32
inner_size = 64
betwe_size = 96
outer_size = 128
batch_size = 25
def r2(y_true, y_pred):
SS_res = K.sum(K.square(y_true - y_pred))
SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
return (1 - SS_res / (SS_tot + K.epsilon()))
def rmse(y_true, y_pred):
return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
input_ = Input(shape=(input_size,))
encoded = Dense(outer_size, activation='selu')(input_) #hidden 1
#encoded = Dense(betwe_size, activation='elu')(input_) #hidden 2
encoded = BatchNormalization()(encoded)
encoded = Dense(inner_size, activation='selu')(encoded) #hidden 3
code = Dense(coder_size, activation='selu')(encoded) #code
decoded = Dense(inner_size, activation='selu')(code) #hidden 2
decoded = BatchNormalization()(decoded)
#decoded = Dense(betwe_size, activation='elu')(decoded) #hidden 2
decoded = Dense(outer_size, activation='selu')(decoded) #hidden 1
output = Dense(input_size, activation='sigmoid')(decoded) #output
autoencoder = Model(input_, output)
autoencoder.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = [r2, rmse])
val = autoencoder.fit(x_train, x_train,
epochs=1000,
batch_size = 75,
shuffle=True,
validation_data=(x_test, x_test),
callbacks=[TensorBoard(log_dir='/tmp/test'), EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 30, verbose = True, mode = 'auto')])
plt.plot(val.history['loss'])
plt.plot(val.history['val_loss'])
plt.title('Model loss (MSR)')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'best')
plt.show()
plt.plot(val.history['r2'])
plt.plot(val.history['val_r2'])
plt.title('Model R2')
plt.ylabel('R2')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'best')
plt.show()
plt.plot(val.history['rmse'])
plt.plot(val.history['val_rmse'])
plt.title('Model RMSE')
plt.ylabel('RMSE')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'best')
plt.show()
The model works fine, the issue is I cannot find anyway to extract part of the model (from input to code) after it has been trained on the full autoencoder. It seems to me, if you cant pull half the model the autoencoder cannot be used for any practical dimension reduction. Thus, there should be a way to freeze and pull the trained models first several layers. The end goal is to apply the layer with 32 dimensions to a t-SNE and compare results against other dimension reduction techniques.
After training just do the following:
encoder = Model(input_, code)
Then use encoder.predict to get the code from an input sample.

Categories