I’m working on a code that predicts the wind speed.
At first, I used print(history.history.keys()) in order to print loss, val_loss, mape and val_mean_absolute_percentage_error values, but, it only displays dict_keys(['loss', 'mape']).
Then, since it doesn't have val_loss and val_mean_absolute_percentage_error values, it displays a KeyError: ‘val_mean_absolute_percentage_error’
Can you help me?
Dataset
Here is my code:
from __future__ import print_function
from sklearn.metrics import mean_absolute_error
import math
import numpy as np
import matplotlib.pyplot as plt
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
# fix random seed for reproducibility
np.random.seed(7)
# load the dataset
dataframe = read_csv(‘OND_Q4.csv’, usecols=[7], engine=’python’, header=3)
dataset = dataframe.values
print(dataframe.head)
dataset = dataset.astype(‘float32′)
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.7) # Use 70% of data to train
test_size = len(dataset) – train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
#compile model
model.compile(loss=’mean_squared_error’, optimizer=’adam’,metrics=[‘mape’])
history=model.fit(trainX, trainY, epochs=5, batch_size=1, verbose=2)
# list all data in history
print(history.history.keys())
train_MAPE = history.history[‘mape’]
valid_MAPE = history.history[‘val_mean_absolute_percentage_error’]
train_MSE = history.history[‘loss’]
valid_MSE = history.history[‘val_loss’]
Thank you
You need to define a validation set in model.fit()
You can do it with validation_split=0.2 (Float between 0 and 1. Fraction of the training data to be used as validation data.)
E.g. history=model.fit(trainX, trainY, epochs=5, validation_split=0.2, batch_size=1, verbose=2)
Or you can use validation_data= (Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. validation_data will override validation_split. validation_data could be: - tuple (x_val, y_val) of Numpy arrays or tensors - tuple (x_val, y_val, val_sample_weights) of Numpy arrays - dataset or a dataset iterator
Related
I have designed Python code using Keras + TF ) that predicts a self-generated time series (a sinusoid + a linear trend) using a simple LSTM model.
When predicting the sinusoid alone, it works OK.
However, when I add the linear trend line, it gets out of whack and I can't bring it to fit it 'good enough'. The last predicted time steps are always off.
Why is it behaving this way?
# LSTM for international airline passengers problem with memory
import numpy as np
import matplotlib.pyplot as plt
from pandas import read_csv
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
# fix random seed for reproducibility
tf.random.set_seed(7)
# load the dataset
dataframe = read_csv(r'C:\Users\LENOVO\Documents\Python Scripts\airline-passengers.txt', usecols=[1], engine='python')
dataset = dataframe.values
dataset = dataset.astype('float32')
dataset = (np.sin(np.arange(0,500))*10 + np.arange(0,500)).reshape(-1,1)
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset)*0.5)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 5
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))
# create and fit the LSTM network
batch_size = 1
model = Sequential()
model.add(LSTM(32, batch_input_shape=(batch_size, look_back, 1), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(100):
model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
model.reset_states()
# make predictions
trainPredict = model.predict(trainX, batch_size=batch_size)
model.reset_states()
testPredict = model.predict(testX, batch_size=batch_size)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = np.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = np.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
fig, ax = plt.subplots(1, 1, figsize=(15, 5))
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot, label='train prediction')
plt.plot(testPredictPlot, label='test prediction')
ax.legend(loc="lower left")
plt.show()
I am using WTI DATA from US website. I am using CNN-LSTM model for prediction and forecasting. My problem is that I can't understand how to forecast 10 days ahead. I have run all of this program with no error. I am using Jupyter notebook in Python. Prediction and graphs are successfully run.
My program:
import numpy
import pandas
import math
import matplotlib.pyplot as plt
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
# load the dataset
dataframe = pandas.read_csv('C:/Users/HP/Desktop/python/WTI.csv', usecols=[1], engine='python')
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
# reshape into X=t and Y=t+1
look_back = 1
# choose a number of time steps
n_steps = 2
# split into samples
# reshape from [samples, timesteps] into [samples, subsequences, timesteps, features]
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
n_features = 1
n_seq = 1
n_steps = 1
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], n_seq, n_steps, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], n_seq, n_steps, testX.shape[1]))
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(None,1, look_back)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(trainX, trainY, epochs=20, batch_size=1, verbose=2)
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
MAE = mean_absolute_error(trainY[0], trainPredict[:,0])
print(MAE)
trainMAE = mean_absolute_error(trainY[0], trainPredict[:,0])
print(trainMAE)
testMAE = mean_absolute_error(testY[0], testPredict[:,0])
print(testMAE)
trainmape = mean_absolute_percentage_error(trainY[0], trainPredict[:,0])
print(trainmape)
testmape= mean_absolute_percentage_error(testY[0],testPredict[:,0])
print(testmape)
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()
I have one column of sequential demand for one item:
I have a LSTM neural network to test the prediction ability of this network and it works for one column. See the code below. But now I want to use several columns for different items and calculate the 'ABSE' for every column.
How can I do this?
This is the code:
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# fix random seed for reproducibility
numpy.random.seed(7)
# load the dataset
dataframe = pandas.read_excel('dataset.xlsx')
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:],
dataset[train_size:len(dataset),:]
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)
def ABSE(a,b):
ABSE = abs((b-a)/a)
return numpy.mean(ABSE)
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = ABSE(trainY[0], trainPredict[:,0])
print('Train Score: %.2f ABSE' % (trainScore))
testScore = ABSE(testY[0], testPredict[:,0])
print('Test Score: %.2f ABSE' % (testScore))
If your code works for the first column. Just extract column separately and give it to the model. Like this :
# train your model on first column
...
for name_col in dataframe.columns:
item = dataframe[name_col]
# your preprocessing
...
model.predict(item)
# your evaluation
...
# -*- coding: utf-8 -*-
"""
import numpy as np
"""
"""
from numpy.random import seed
seed(10)
from tensorflow import set_random_seed
import tensorflow as tf
set_random_seed(1)
import os
os.environ['PYTHONHASHSEED'] = '0'
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
from keras import backend as K
K.set_session(sess)
"""
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout,CuDNNGRU,CuDNNLSTM
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back):
dataX.append(dataset[i:(i+look_back), 0])
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
dataset = np.cos(np.arange(1000)*(20*np.pi/1000))[:,None]
plt.plot(dataset)
plt.show()
look_back = 30
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))
batch_size = 1
model = Sequential()
model.add(CuDNNLSTM(16, batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=True))
model.add(Dropout(0.3))
model.add(CuDNNLSTM(16, batch_input_shape=(batch_size, look_back, 1), stateful=True))
model.add(Dropout(0.3))
model.add(Dense(16,activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
for i in range(5):
print(i)
model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=1, shuffle=False)
model.reset_states()
trainScore = model.evaluate(trainX, trainY, batch_size=batch_size, verbose=0)
print('Train Score: ', trainScore)
testScore = model.evaluate(testX[:252], testY[:252], batch_size=batch_size, verbose=0)
print('Test Score: ', testScore)
look_ahead = 250
trainPredict = [np.vstack([trainX[-1][1:], trainY[-1]])]
predictions = np.zeros((look_ahead,1))
for i in range(look_ahead):
prediction = model.predict(np.array([trainPredict[-1]]), batch_size=batch_size)
predictions[i] = prediction
trainPredict.append(np.vstack([trainPredict[-1][1:],prediction]))
plt.figure(figsize=(12,5))
# plt.plot(np.arange(len(trainX)),np.squeeze(trainX))
# plt.plot(np.arange(200),scaler.inverse_transform(np.squeeze(trainPredict)[:,None][1:]))
# plt.plot(np.arange(200),scaler.inverse_transform(np.squeeze(testY)[:,None][:200]),'r')
plt.plot(np.arange(look_ahead),predictions,'r',label="prediction")
plt.plot(np.arange(look_ahead),dataset[train_size:(train_size+look_ahead)],label="test function")
plt.legend()
plt.show()
This code is based on this guys example here:
https://github.com/sachinruk/PyData_Keras_Talk/blob/master/cosine_LSTM.ipynb
Instead of setting the number of epochs he is using a for loop. Can you just set epochs in the fit command?
Secondly without setting a seed my results between runs vary wildly. I understand you need to set a seed to get reproducible results. However should results vary this much. In one case I am getting something that looks like a sinewave. Sometimes I end up with a straight line. Sometimes I end with a sine wave with the wrong frequency. Is this much variability normal?
Since in this case we are using the LSTM to predict multiple points the future I understand that we can have compounding errors. I am wondering if the was the fit is being called is maybe causing that. I have tried both methods and they seem to yield similar results. I am kind of lost on why this is happening.
Here is an example of the various outputs I got:
https://imgur.com/a/esEaVf9
There are some differences between running single epochs in a for loop vs specifying multipe epochs in the fit.
For ex: the learning rate decay usually works / gets modified after each epoch.
Following post has more specifics on this
https://datascience.stackexchange.com/questions/26112/decay-parameter-in-keras-optimizers?rq=1
So, I understand that normalization is important to train a neural network.
I also understand that I have to normalize validation- and test-set with the parameters from the training set (see e.g. this discussion: https://stats.stackexchange.com/questions/77350/perform-feature-normalization-before-or-within-model-validation)
My question is: How do I do this in Keras?
What I'm currently doing is:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
def Normalize(data):
mean_data = np.mean(data)
std_data = np.std(data)
norm_data = (data-mean_data)/std_data
return norm_data
input_data, targets = np.loadtxt(fname='data', delimiter=';')
norm_input = Normalize(input_data)
model = Sequential()
model.add(Dense(25, input_dim=20, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_acc', patience=50)
model.fit(norm_input, targets, validation_split=0.2, batch_size=15, callbacks=[early_stopping], verbose=1)
But here, I first normalize the data w.r.t. the whole data set and then split up the validation set, which is wrong according to the above mentioned discussion.
It wouldn't be a big deal to save the mean and standard deviation from the training set(training_mean and training_std), but how can I apply the normalization with the training_mean and training_std on the validation set separately?
Following code makes exactly what you want:
import numpy as np
def normalize(x_train, x_test):
mu = np.mean(x_train, axis=0)
std = np.std(x_train, axis=0)
x_train_normalized = (x_train - mu) / std
x_test_normalized = (x_test - mu) / std
return x_train_normalized, x_test_normalized
Then you can use it with keras like this:
from keras.datasets import boston_housing
(x_train, y_train), (x_test, y_test) = boston_housing.load_data()
x_train, x_test = normalize(x_train, x_test)
The Wilmar's answer is not correct.
You can split your data into a training and testing dataset manually before fitting the model with sklearn.model_selection.train_test_split. Afterwards, normalize the training and testing data based on the mean and standard deviation of the training data. Finally, call model.fit with the validation_data argument.
Code example
import numpy as np
from sklearn.model_selection import train_test_split
data = np.random.randint(0,100,200).reshape(20,10)
target = np.random.randint(0,1,20)
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)
def Normalize(data, mean_data =None, std_data =None):
if not mean_data:
mean_data = np.mean(data)
if not std_data:
std_data = np.std(data)
norm_data = (data-mean_data)/std_data
return norm_data, mean_data, std_data
X_train, mean_data, std_data = Normalize(X_train)
X_test, _, _ = Normalize(X_test, mean_data, std_data)
model.fit(X_train, y_train, validation_data=(X_test,y_test), batch_size=15, callbacks=[early_stopping], verbose=1)