How to loop through various train and test splits - python

I have various train and test splits that I create using TimeSeriesSplit(). My dataframe has 377 observations with 6 input variables and 1 target variable.
I split my dataframe into train and test using the following code:
#train set
i=0
for X_train, X_test in tscv.split(data):
i=i+1
print ("No of observations under train%s=%s"%(i,len(X_train)))
print ("No of observations under test%s=%s" % (i, len(X_test)))
X_train1, X_test1 = data[:67, :-1], data[67:129,:-1]
X_train2, X_test2 = data[:129,:-1], data[129:191,:-1]
X_train3, X_test3 = data[:191,:-1], data[191:253,:-1]
X_train4, X_test4 = data[:253,:-1], data[253:315,:-1]
X_train5, X_test5 = data[:315,:-1], data[315:377,:-1]
#test set
i=0
for y_train, y_test in tscv.split(data):
i=i+1
print ("No of observations under train%s=%s"%(i,len(y_train)))
print ("No of observations under test%s=%s" % (i, len(y_test)))
y_train1, y_test1 = data[:67, -1], data[67:129 ,-1]
y_train2, y_test2 = data[:129,-1], data[129:191,-1]
y_train3, y_test3 = data[:191,-1], data[191:253,-1]
y_train4, y_test4 = data[:253,-1], data[253:315,-1]
y_train5, y_test5 = data[:315,-1], data[315:377,-1]
So i have 5 splits in total. I want to train my lstm model looping through these splits but I am not sure how best I can do that. Here’s the code for my lstm:
# split into input and outputs
train_X, train_y = X_train, y_train
test_X, test_y = X_test, y_test
#reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM, Flatten
import matplotlib.pyplot as pyplot
# design network
model = Sequential()
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')
history = model.fit(train_X, train_y, epochs=700
, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()
#predictions
y_lstm = model.predict(test_X)
#metrics for test set
mse_lstm = mean_squared_error(y_test, y_lstm)
rmse_lstm = np.sqrt(mse_lstm)
r2_lstm = r2_score(y_test, y_lstm)
mae_lstm = mean_absolute_error(y_test, y_lstm)
#train metics
train = model.predict(X_t_reshaped)
msetrain = mean_squared_error(y_train, train)
rmsetrain = np.sqrt(msetrain)
r2train = r2_score(y_train, train)
What can I do to use the above code to loop through all my different splits and store the results in a list or dataframe?
I want to also plot the predicted results as shown below
This is the grapgh am getting based on #Ashraful answer

Replace your last Code block using this,
from sklearn.metrics import mean_squared_error
from sklearn.metrics import *
import numpy as np
import csv
Round = 3 # define the number of digits after decimal point you want
fields = ['Fold_No', 'mse_lstm', 'rmse_lstm', 'r2_lstm','mae_lstm']
csvfile = open('Summary.csv', 'w')
csvwriter = csv.writer(csvfile)
csvwriter.writerow(fields)
for fold in range(1,6):
print(f'Running fold {fold}')
# split into input and outputs
train_X, train_y = eval(f'X_train{fold}'),eval(f'y_train{fold}')
test_X, test_y = eval(f'X_test{fold}'),eval(f'y_test{fold}')
print(train_X.shape)
#reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,LSTM, Flatten
import matplotlib.pyplot as pyplot
# design network
model = Sequential()
model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')
history = model.fit(train_X, train_y, epochs=2
, batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()
#predictions
train_output = model.predict(train_X)
y_lstm = model.predict(test_X)
pyplot.plot(train_output, label='Training output')
pyplot.plot(train_y, label='Obesrved Training Target')
# pyplot.plot(train_y, label='Training value')
pyplot.plot(test_y, label='Obesrved Predic. Target')
pyplot.plot(y_lstm, label='Predicted Output')
pyplot.legend(loc='upper right')
# pyplot.legend()
pyplot.show()
#metrics for test set
mse_lstm = mean_squared_error(y_test1, y_lstm)
rmse_lstm = np.sqrt(mse_lstm)
r2_lstm = r2_score(y_test1, y_lstm)
mae_lstm = mean_absolute_error(y_test1, y_lstm)
csvwriter.writerow([f'Fold_{fold}',round(mse_lstm,Round), round(rmse_lstm,Round), round(r2_lstm,Round),round(mae_lstm,Round)])
csvfile.close()
#read stored CSV file
summary= pd.read_csv('Summary.csv')
print(summary)
Also, my implementatin in colab file you can find here.

Related

Keras hyperparameter tuning

using this dataset https://www.kaggle.com/datasets/harlfoxem/housesalesprediction?datasetId=128&searchQuery=hyperparameter+tuning&language=Python
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
#Random seed for reproducibiliity
np.random.seed(5)
####DATA PRE-PROCESSING ####
#read the data
data = pd.read_csv('kc_house_data.csv')
#check for missing/null values
print(data.isnull().any())
#create a 'house age' colum that encompasses both date (which doesn't help in predicting price) and the year built
data['date'] = pd.to_datetime(data['date']) #first convert date format
data["house_age"] = data["date"].dt.year - data['yr_built']
#if 'renovated' is 0, it means not renovated- so to stop confusion, if year renovated is not 0, add 1 to a new 'renovated' column
data['renovated'] = data['yr_renovated'].apply(lambda yr: 0 if yr == 0 else 1)
#Now drop the columns we do not need
data=data.drop('date', axis=1)
data=data.drop('yr_renovated', axis=1)
data=data.drop('yr_built', axis=1)
data.head(5)
print(data)
#Split into test and train
X = data
Y = X['price'].values
X = X.drop('price', axis = 1)
X_train, X_test, Y_train, Y_test = train_test_split (X, Y, test_size = 0.20, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_test, Y_test, test_size=0.5) #splitting into validation sets as well - not sure if needed tho
#Scale
standard_scaler = StandardScaler() #Unsure if min max scaler would be more appropriate here?
X_train= pd.DataFrame(standard_scaler.fit_transform(X_train),columns=X_train.columns)
X_test = pd.DataFrame(standard_scaler.transform(X_test),columns = X_test.columns)
X_val = pd.DataFrame(standard_scaler.transform(X_test),columns = X_val.columns)
#print(X_train.shape, X_val.shape, X_test.shape, Y_train.shape, Y_val.shape, Y_test.shape)
# Use scikit-learn to grid search the batch size and epochs
def create_model():
# create model
model = Sequential()
model.add(Dense(32, input_dim=19, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam' , metrics='mse')
return model
# create model
model = KerasRegressor(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [5, 10, 20, 40, 60, 80]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3, scoring = 'neg_mean_squared_error')
grid_result = grid.fit(X_train, Y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
param_grid = dict(epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3, scoring = 'neg_mean_squared_error')
grid_result = grid.fit(X_train, Y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
It's just not working for me. What I want to do is create a simple neural network and then finetune the epochs and batch size. But this is just giving me 'acccuracy' as NAN. I'm so confused. Any help appreciated
Current output:
Best: -419841571707.859070 using {'batch_size': 5}
Best: -419841571132.739441 using {'epochs': 100}
However if i add batch sizes of 1,2,3 etc, it will give that- it always says the best result is the smallest batch size. Is this right?

Cannot transform list into tensor: ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list)

I really need your help, I have been trying to figure it out but still no way out of the problem.. I can read the dataset(like the picture, using pandas), but cant make this turns into a tensor, tried lots of different ways to make the list to tensor, or array, matrix.., but none worked..
Do you have some idea how can I change my csv reading to make the data fit into the tensodflow?
import sys
import numpy as np
from numpy import array
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.sparse import csr_matrix
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from pandas import Series
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from numpy import array
from keras.models import load_model
from sklearn import datasets
df = pd.read_csv('set1.csv')
#conversão lista em tensor
def my_func(arg):
arg = tf.convert_to_tensor(arg, dtype=tf.float32)
return arg
#Uma vgez, e a lista STR é convertida em lista
s1 = df["sensor1"].apply(eval)
s2 = df["sensor2"].apply(eval)
s3 = df["sensor3"].apply(eval)
X = (s1, s2, s3)
X = np.matrix(X)
X = X.transpose()
Y = pd.get_dummies(data=df.categoria).values
#print("shape Y:",(Y.shape), " tipo Y:" ,(type(Y)), "len Y:", len(Y))
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state = 42)
print("Train Shape:", X_train.shape,Y_train.shape)
print("Test Shape:", X_test.shape,Y_test.shape)
def model():
if pre_trained_wv is True:
print("USE PRE TRAINED")
#num_words = min(max_features, len(word_index) + 1)
#num_words
weights_embedding_matrix = load_pre_trained_wv(word_index, num_words, word_embedding_dim)
input_shape = (max_sequence_length,)
model_input = Input(shape=input_shape, name="input", dtype='float64')
embedding = Embedding(
num_words,
word_embedding_dim,
input_length=max_sequence_length,
name="embedding",
weights=[weights_embedding_matrix],
trainable=False)(model_input)
#if bilstm is True:
# lstm = Bidirectional(LSTM(word_embedding_dim, dropout=0.2, recurrent_dropout=0.2, name="lstm"))(embedding)
#else:
lstm = LSTM(word_embedding_dim, dropout=0.2, recurrent_dropout=0.2, name="lstm")(embedding)
else:
input_shape = (max_sequence_length,)
#model_input = Input(shape=input_shape, name="input", dtype='int32')
model_input = Input(shape=input_shape, name="input")
embedding = Embedding(max_features, embed_dim, input_length=max_sequence_length, name="embedding")(model_input)
#if bilstm is True:
# lstm = Bidirectional(LSTM(embed_dim, dropout=0.2, recurrent_dropout=0.2, name="lstm"))(embedding)
#else:
lstm = LSTM(embed_dim, dropout=0.2, recurrent_dropout=0.2, name="lstm")(embedding)
model_output = Dense(2, activation='softmax', name="softmax")(lstm)
model = Model(inputs=model_input, outputs=model_output)
return model
model = model()
model.compile(loss = 'binary_crossentropy', optimizer='adam', metrics = ['accuracy'])
print(model.summary())
if not os.path.exists('./{}'.format(filename) ):
hist = model.fit(
X_train,
Y_train,
validation_data=(X_test, Y_test),
epochs=epochs,
batch_size=batch_size,
shuffle=True,
verbose=1)
model.save_weights(filename)
# Plot
plt.figure()
plt.plot(hist.history['loss'], lw=2.0, color='b', label='train')
plt.plot(hist.history['val_loss'], lw=2.0, color='r', label='val')
plt.title('Classificador de Gestos')
plt.xlabel('Epochs')
plt.ylabel('Cross-Entropy')
plt.legend(loc='upper right')
plt.show()
plt.figure()
plt.plot(hist.history['acc'], lw=2.0, color='b', label='train')
plt.plot(hist.history['val_acc'], lw=2.0, color='r', label='val')
plt.title('Classificador de Gestos')
plt.xlabel('Epochs')
plt.ylabel('Acurácia')
plt.legend(loc='upper left')
plt.show()
else:
model.load_weights('./{}'.format(filename) )
scores = model.evaluate(X_test, Y_test, verbose = 0, batch_size = batch_size)
print("Acc: %.2f%%" % (scores[1]*100))
while True:
sentence = input("input> ")
if sentence == "exit":
break
new_text = pad_sequences(new_text, maxlen=max_sequence_length, dtype='float64', value=0)
#new_text = pad_sequences(new_text, maxlen=max_sequence_length, value=0)
sentiment = model.predict(new_text,batch_size=1,verbose = 2)[0]
if(np.argmax(sentiment) == 0):
pred_proba = "%.2f%%" % (sentiment[0]*100)
print("movimento para direita => ", pred_proba)
elif (np.argmax(sentiment) == 1):
pred_proba = "%.2f%%" % (sentiment[1]*100)
print("movimento para esquerda => ", pred_proba)
this is the csv file:
CSV

Music classifier model giving same predictions

I'm trying to do a music classifier using a CNN model but im always getting same prediction which is wrong
I used the GTZAN dataset of songs
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
#Keras
import keras
from keras import models
from keras import layers
# generating a dataset
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
header += f' mfcc{i}'
header += ' label'
header = header.split()
file = open('data.csv', 'w', newline='')
with file:
writer = csv.writer(file)
writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
for filename in os.listdir(f'C:/Users/USER/Desktop/sem8/AI/project/gtzan.keras/data/genres/{g}'):
songname = f'C:/Users/USER/Desktop/sem8/AI/project/gtzan.keras/data/genres/{g}/{filename}'
y, sr = librosa.load(songname, mono=True, duration=30)
chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
rmse = librosa.feature.rms(y=y)
spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
zcr = librosa.feature.zero_crossing_rate(y)
mfcc = librosa.feature.mfcc(y=y, sr=sr)
to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
for e in mfcc:
to_append += f' {np.mean(e)}'
to_append += f' {g}'
file = open('datatest3.csv', 'a', newline='')
with file:
writer = csv.writer(file)
writer.writerow(to_append.split())
# reading dataset from csv
data = pd.read_csv('datatest3.csv')
data.head()
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)
data.head()
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)
# normalizing
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))
# spliting of dataset into train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# creating a model
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = model.fit(X_train,
y_train,
epochs=20,
batch_size=128)
# calculate accuracy
test_loss, test_acc = model.evaluate(X_test,y_test)
print('test_acc: ',test_acc)
# predictions
predictions = model.predict(X_test)
np.argmax(predictions[0])
This is how i made the csv file
data = pd.read_csv('C:\\Users\\USER\\Desktop\\sem8\\AI\\project\\try\\datatest.csv')
data.head()
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)
# normalizing
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))
# spliting of dataset into train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# creating a model
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.summary()
history = model.fit(X_train,
y_train,
epochs=20,
batch_size=128)
# calculate accuracy
test_loss, test_acc = model.evaluate(X_test,y_test)
print('test_acc: ',test_acc)
# predictions
print(X_test[0])
predictions = model.predict(X_test)
print(np.argmax(predictions[0]))
# model.summary()
model.save("C:\\Users\\USER\\Desktop\\sem8\\AI\\project\\try\\mymodel.h5")
print("Saved model to disk")
Then i created and saved the model
genres = {
'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4,
'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9
}
def majority_voting(scores, dict_genres):
preds = np.argmax(scores, axis = 1)
values, counts = np.unique(preds, return_counts=True)
counts = np.round(counts/np.sum(counts), 2)
votes = {k:v for k, v in zip(values, counts)}
votes = {k: v for k, v in sorted(votes.items(), key=lambda item: item[1], reverse=True)}
return [(get_genres(x, dict_genres), prob) for x, prob in votes.items()]
def get_genres(key, dict_genres):
# Transforming data to help on transformation
labels = []
tmp_genre = {v:k for k,v in dict_genres.items()}
return tmp_genre[key]
def prepare(filename):
# y, sr = librosa.load(filename, mono=True, duration=30)
# return y
# signal, sr = librosa.load(filename, sr=None)
y, sr = librosa.load(filename, mono=True, duration=30)
chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
rmse = librosa.feature.rms(y=y)
spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
zcr = librosa.feature.zero_crossing_rate(y)
mfcc = librosa.feature.mfcc(y=y, sr=sr)
to_append = f'{np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
for e in mfcc:
to_append += f' {np.mean(e)}'
# Append the result to the data structure
# features = get_features(signal, sr)
song = pd.DataFrame([to_append.split()])
return song
model=models.load_model("C:\\Users\\USER\\Desktop\\sem8\\AI\\project\\try\\mymodel.h5")
print(prepare("C:\\Users\\USER\\Desktop\\sem8\\AI\\project\\try\\song1.mp3"))
newsong=prepare("C:\\Users\\USER\\Desktop\\sem8\\AI\\project\\try\\song1.mp3")
song = (np.array(newsong)).reshape(26)
print(song.shape)
print(song)
prediction = model.predict(song)
votes = majority_voting(prediction, genres)
print("This song is a {} song".format(votes[0][0]))
print("most likely genres are: {}".format(votes[:3]))
print(prediction)
This is how im trying to predict
and im always getting same result
And i think everything i did is correct
This song is a hiphop song
most likely genres are: [('hiphop', 1.0)]
[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]
The problem is in normalizing the data for prediction, it should be shaped just like the data that is used for training with the standardscaler

Why can model not even predict sine

I am trying to generate a learned timeseries with an LSTM RNN using Keras, so I want to predict a datapoint, and feed it back in as input to predict the next one and so on, so that I can actually generate the timeseries (for example given 2000 datapoints, predict the next 2000)
I am trying it like this, but the Test score RMSE is 1.28 and the prediction is basically a straight line
# LSTM for international airline passengers problem with regression framing
import numpy
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
numpy.random.seed(7)
# load the dataset
dataset = np.sin(np.linspace(0,35,10000)).reshape(-1,1)
print(type(dataset))
print(dataset.shape)
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.5)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(16, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=10, batch_size=1, verbose=2)
# make predictions
trainPredict = model.predict(trainX)
testPredict = list()
prediction = model.predict(testX[0].reshape(1,1,1))
for i in range(trainX.shape[0]):
prediction = model.predict(prediction.reshape(1,1,1))
testPredict.append(prediction)
testPredict = np.array(testPredict).reshape(-1,1)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()
What am I doing wrong?
I see multiple issues with your code. Your value for look_back is 1, which means the LSTM sees only one Sample at a time, which is obviously not sufficient to learn anything about the sequence.
You probably did this so that you can make the final prediction at the end by feeding the prediction from the previous step as the new input. To correct way to make this work is to train with more timesteps and then change to network to a stateful LSTM with a single timestep.
Also, when you do the final prediction you have to show the network more than one ground truth sample. Otherwise the position on the sine is ambigious. (is it going up or down in the next step?)
I slapped together q quick example. Here is how I generated the data:
import numpy as np
numSamples = 1000
numTimesteps = 50
width = np.pi/2.0
def getRandomSine(numSamples = 100, width = np.pi):
return np.sin(np.linspace(0,width,numSamples) + (np.random.rand()*np.pi*2))
trainX = np.stack([getRandomSine(numSamples = numTimesteps+1) for _ in range(numSamples)])
valX = np.stack([getRandomSine(numSamples = numTimesteps+1) for _ in range(numSamples)])
trainX = trainX.reshape((numSamples,numTimesteps+1,1))
valX = valX.reshape((numSamples,numTimesteps+1,1))
trainY = trainX[:,1:,:]
trainX = trainX[:,:-1,:]
valY = valX[:,1:,:]
valX = valX[:,:-1,:]
Here I trained the model:
import keras
from keras.models import Sequential
from keras import layers
model = Sequential()
model.add(layers.recurrent.LSTM(32,return_sequences=True,input_shape=(numTimesteps, 1)))
model.add(layers.recurrent.LSTM(32,return_sequences=True))
model.add(layers.wrappers.TimeDistributed(layers.Dense(1,input_shape=(1,10))))
model.compile(loss='mean_squared_error',
optimizer='adam')
model.summary()
model.fit(trainX, trainY, nb_epoch=50, validation_data=(valX, valY), batch_size=32)
And here I changed the trained model to allow the continues prediction:
# serialize the model and get its weights, for quick re-building
config = model.get_config()
weights = model.get_weights()
config[0]['config']['batch_input_shape'] = (1, 1, 1)
config[0]['config']['stateful'] = True
config[1]['config']['stateful'] = True
from keras.models import model_from_config
new_model = Sequential().from_config(config)
new_model.set_weights(weights)
#create test sine
testX = getRandomSine(numSamples = numTimesteps*10, width = width*10)
new_model.reset_states()
testPredictions = []
# burn in
for i in range(numTimesteps):
prediction = new_model.predict(np.array([[[testX[i]]]]))
testPredictions.append(prediction[0,0,0])
# prediction
for i in range(numTimesteps, len(testX)):
prediction = new_model.predict(prediction)
testPredictions.append(prediction[0,0,0])
# plot result
import matplotlib.pyplot as plt
plt.plot(np.stack([testPredictions,testX]).T)
plt.show()
Here is what the result looks like. The prediction errors add up and very quickly it diverges from the input sine. But it clearly learned the general shape of sines. You can now try to improve on this by trying different layers, activation functions etc.
I was working a bit on a different architecture and uploaded it on github.
So for all people who are looking into predicting a time series point by point, I hope this helps.
The results look like this:

Keras doesn't make good predictions

Two months ago I started working with keras in order to obtain a pump pattern for using it in other software.
I don't know the reason(s) why the patterns I obtain have nothing to do with the real ones. I have tried by establishing few features (inputs) in the dataset, and also with more inputs, but there is no way it works.
The results seem like this:
Where:
Blue: dataset (real data I'm trying to "aproximate")
Orange: prediction
The dataset is a Time Series
hereis the csv file with the dataset
Here is the code:
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.regularizers import l2, activity_l2
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset) - look_back - 1):
a = dataset[i:(i + look_back), 0:4]
dataX.append(a)
dataY.append(dataset[i + look_back, 4])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
seed=7
numpy.random.seed(seed)
# load dataset
dataframe = pandas.read_csv('datos_horarios.csv', engine='python')
dataset = dataframe.values
# normalizar el dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
#split data into train data and test data
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]
# reshape to X=t y Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape inputs to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], look_back, 4))
testX = numpy.reshape(testX, (testX.shape[0], look_back, 4))
# create and adjust LSTM network
model = Sequential()
model.add(Dropout(0.3, input_shape=(look_back,4)))
model.add(LSTM(6, input_shape=(look_back,4), W_regularizer=l2(0.001)))
model.add(Dense(10))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam' ,momentum=0.99)
history= model.fit(trainX, trainY,validation_split=0.33, nb_epoch=250, batch_size=32)
# Plot
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epochs')
plt.legend(['training', 'validation'], loc='upper right')
plt.show()
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
print(trainPredict)
numero_inputs=4
inp=numero_inputs-1
# Get something which has as many features as dataset
trainPredict_extended = numpy.zeros((len(trainPredict),numero_inputs+1))
# Put the predictions there
trainPredict_extended[:,inp+1] = trainPredict[:,0]
# Inverse transform it and select the 3rd column.
trainPredict = scaler.inverse_transform(trainPredict_extended)[:,inp+1]
# Get something which has as many features as dataset
testPredict_extended = numpy.zeros((len(testPredict),numero_inputs+1))
# Put the predictions there
testPredict_extended[:,inp+1] = testPredict[:,0]
# Inverse transform it and select the 3rd column.
testPredict = scaler.inverse_transform(testPredict_extended)[:,inp+1]
trainY_extended = numpy.zeros((len(trainY),numero_inputs+1))
trainY_extended[:,inp+1]=trainY
trainY=scaler.inverse_transform(trainY_extended)[:,inp+1]
testY_extended = numpy.zeros((len(testY),numero_inputs+1))
testY_extended[:,inp+1]=testY
testY=scaler.inverse_transform(testY_extended)[:,inp+1]
# Calcular error medio cuadratico
trainScore = math.sqrt(mean_squared_error(trainY, trainPredict))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY, testPredict))
print('Test Score: %.2f RMSE' % (testScore))
# add train predictions to the plot
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, 0] = trainPredict
# add test predictions to the plot
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, 0] = testPredict
# Plot real data and training and test predictions
serie,=plt.plot(scaler.inverse_transform(dataset)[:,numero_inputs]) #invierto muestras en formato (0,1) a valores reales y los ploteo
entrenamiento,=plt.plot(trainPredictPlot[:,0],linestyle='--') #ploteo las predicciones de entrenamiento
prediccion_test,=plt.plot(testPredictPlot[:,0],linestyle='--')
plt.ylabel(' (m3)')
plt.xlabel('h')
plt.legend([serie,entrenamiento,prediccion_test],['Time series','Training','Prediction'], loc='upper right')
plt.show()
Any ideas about how I can fix this problem? Or, at least, what the problem is?
INPUTS BY COLUMN:
Time of the day (each half an hour), converted to decimal.
Day of the week (1-Monday...7-sunday)
Month of the year (1-12)
Day of the month (1-31)
OUTPUT:
Pumped water (m3)
EDIT
Using the #a_guest 's code, and changing some parameters, such as the number of epochs or the history value, the results are really nice:
Not an answer but I share the code here with which I obtained the following results:
Note that the network parameters are chosen arbitrarily, i.e. not optimized. That is you can most likely get better results by varying those parameters. Also varying the value of history (or look_back in your case) probably has a significant effect on the quality of predictions.
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(12)
history = 96
def generate_data():
data = numpy.loadtxt('datos_horarios.csv', delimiter=',', dtype=float)
# Normalize data.
data[:, -1] /= numpy.max(data[:, -1])
train_test_data = []
for i in xrange(data.shape[0] - history - 1):
# Include the reference value here, will be extracted later.
train_test_data.append(data[i:i+history+1, -1].flatten())
return numpy.array(train_test_data)
train_test_data = generate_data()
# Shuffle data set in order to randomly select training and test data.
numpy.random.shuffle(train_test_data)
n_samples = train_test_data.shape[0]
n_train_samples = int(0.8 * n_samples)
train_data = train_test_data[:n_train_samples, :-1]
train_data_reference = train_test_data[:n_train_samples, -1][:, None]
test_data = train_test_data[n_train_samples:, :-1]
test_data_reference = train_test_data[n_train_samples:, -1]
print 'Tranining data: ', train_data
print 'Reference values: ', train_data_reference
model = Sequential()
model.add(Dense(history, input_dim=history, activation='sigmoid'))
model.add(Dense(history/2, activation='sigmoid'))
model.add(Dense(history/4, activation='sigmoid'))
model.add(Dense(history/8, activation='sigmoid'))
model.add(Dense(history/16, activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop', loss='mean_squared_error', metrics=['accuracy'])
model.summary()
model.fit(train_data, train_data_reference, shuffle=True, nb_epoch=200, batch_size=10)
# Use the complete data set to see the network performance.
# Regenerate data set because it was shuffled before.
train_test_data = generate_data()
test_data_predicted = model.predict(train_test_data[:, :-1]).flatten()
test_data_reference = train_test_data[:, -1]
relative_deviation = test_data_predicted/test_data_reference - 1.0
print 'Relative deviation: ', relative_deviation
plt.figure()
plt.plot(range(len(test_data_reference)), test_data_reference, 'b-', label='reference')
plt.plot(range(len(test_data_predicted)), test_data_predicted, 'r--', label='predicted')
plt.xlabel('test case #')
plt.ylabel('predictions')
plt.title('Reference values vs predicted values')
plt.legend()
plt.figure()
plt.plot(range(len(test_data_predicted)), relative_deviation, 'bx', label='relative deviation')
plt.xlabel('test case #')
plt.ylabel('relative deviation')
plt.title('Relative deviation of predicted values (predicted / reference - 1)')
plt.legend()
plt.show()

Categories