Related
I currently have a RNN model for time series predictions. It uses 3 input features "value", "temperature" and "hour of the day" of the last 96 time steps to predict the next 96 time steps of the feature "value".
Here you can see a schema of it:
and here you have the current code:
#Import modules
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow import keras
# Define the parameters of the RNN and the training
epochs = 1
batch_size = 50
steps_backwards = 96
steps_forward = 96
split_fraction_trainingData = 0.70
split_fraction_validatinData = 0.90
randomSeedNumber = 50
#Read dataset
df = pd.read_csv('C:/Users/Desktop/TestData.csv', sep=';', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime':[0]}, index_col=['datetime'])
# standardize data
data = df.values
indexWithYLabelsInData = 0
data_X = data[:, 0:3]
data_Y = data[:, indexWithYLabelsInData].reshape(-1, 1)
scaler_standardized_X = StandardScaler()
data_X = scaler_standardized_X.fit_transform(data_X)
data_X = pd.DataFrame(data_X)
scaler_standardized_Y = StandardScaler()
data_Y = scaler_standardized_Y.fit_transform(data_Y)
data_Y = pd.DataFrame(data_Y)
# Prepare the input data for the RNN
series_reshaped_X = np.array([data_X[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
series_reshaped_Y = np.array([data_Y[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
timeslot_x_train_end = int(len(series_reshaped_X)* split_fraction_trainingData)
timeslot_x_valid_end = int(len(series_reshaped_X)* split_fraction_validatinData)
X_train = series_reshaped_X[:timeslot_x_train_end, :steps_backwards]
X_valid = series_reshaped_X[timeslot_x_train_end:timeslot_x_valid_end, :steps_backwards]
X_test = series_reshaped_X[timeslot_x_valid_end:, :steps_backwards]
Y_train = series_reshaped_Y[:timeslot_x_train_end, steps_backwards:]
Y_valid = series_reshaped_Y[timeslot_x_train_end:timeslot_x_valid_end, steps_backwards:]
Y_test = series_reshaped_Y[timeslot_x_valid_end:, steps_backwards:]
# Build the model and train it
np.random.seed(randomSeedNumber)
tf.random.set_seed(randomSeedNumber)
model = keras.models.Sequential([
keras.layers.SimpleRNN(10, return_sequences=True, input_shape=[None, 3]),
keras.layers.SimpleRNN(10, return_sequences=True),
keras.layers.TimeDistributed(keras.layers.Dense(1))
])
model.compile(loss="mean_squared_error", optimizer="adam", metrics=['mean_absolute_percentage_error'])
history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_valid, Y_valid))
#Predict the test data
Y_pred = model.predict(X_test)
# Inverse the scaling (traInv: transformation inversed)
data_X_traInv = scaler_standardized_X.inverse_transform(data_X)
data_Y_traInv = scaler_standardized_Y.inverse_transform(data_Y)
series_reshaped_X_notTransformed = np.array([data_X_traInv[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
X_test_notTranformed = series_reshaped_X_notTransformed[timeslot_x_valid_end:, :steps_backwards]
Y_pred_traInv = scaler_standardized_Y.inverse_transform (Y_pred)
Y_test_traInv = scaler_standardized_Y.inverse_transform (Y_test)
# Calculate errors for every time slot of the multiple predictions
abs_diff = np.abs(Y_pred_traInv - Y_test_traInv)
abs_diff_perPredictedSequence = np.zeros((len (Y_test_traInv)))
average_LoadValue_testData_perPredictedSequence = np.zeros((len (Y_test_traInv)))
abs_diff_perPredictedTimeslot_ForEachSequence = np.zeros((len (Y_test_traInv)))
absoluteError_Load_Ratio_allPredictedSequence = np.zeros((len (Y_test_traInv)))
absoluteError_Load_Ratio_allPredictedTimeslots = np.zeros((len (Y_test_traInv)))
mse_perPredictedSequence = np.zeros((len (Y_test_traInv)))
rmse_perPredictedSequence = np.zeros((len(Y_test_traInv)))
for i in range (0, len(Y_test_traInv)):
for j in range (0, len(Y_test_traInv [0])):
abs_diff_perPredictedSequence [i] = abs_diff_perPredictedSequence [i] + abs_diff [i][j]
mse_perPredictedSequence [i] = mean_squared_error(Y_pred_traInv[i] , Y_test_traInv [i] )
rmse_perPredictedSequence [i] = np.sqrt(mse_perPredictedSequence [i])
abs_diff_perPredictedTimeslot_ForEachSequence [i] = abs_diff_perPredictedSequence [i] / len(Y_test_traInv [0])
average_LoadValue_testData_perPredictedSequence [i] = np.mean (Y_test_traInv [i])
absoluteError_Load_Ratio_allPredictedSequence [i] = abs_diff_perPredictedSequence [i] / average_LoadValue_testData_perPredictedSequence [i]
absoluteError_Load_Ratio_allPredictedTimeslots [i] = abs_diff_perPredictedTimeslot_ForEachSequence [i] / average_LoadValue_testData_perPredictedSequence [i]
rmse_average_allPredictictedSequences = np.mean (rmse_perPredictedSequence)
absoluteAverageError_Load_Ratio_allPredictedSequence = np.mean (absoluteError_Load_Ratio_allPredictedSequence)
absoluteAverageError_Load_Ratio_allPredictedTimeslots = np.mean (absoluteError_Load_Ratio_allPredictedTimeslots)
absoluteAverageError_allPredictedSequences = np.mean (abs_diff_perPredictedSequence)
absoluteAverageError_allPredictedTimeslots = np.mean (abs_diff_perPredictedTimeslot_ForEachSequence)
Here you have some test data Download Test Data
So now I actually would like to include not only past values of the features into the prediction but also future values of the features "temperature" and "hour of the day" into the prediction. The future values of the feature "temperature" can for example be taken from an external weather forecasting service and for the feature "hour of the day" the future values are know before (in the test data I have included a "forecast" of the temperature that is not a real forecast; I just randomly changed the values).
This way, I could assume that - for several applications and data - the forecast could be improved.
In a schema it would look like this:
Can anyone tell me, how I can do that in Keras with a RNN (or LSTM)? One way could be to include the future values as independant features as input. But I would like the model to know that the future values of a feature are connected to the past values of a feature.
Reminder: Does anybody have an idea how to do this? I'll highly appreciate every comment.
The standard approach is to use an encoder-decoder architecture (see 1 and 2 for instance):
The encoder takes as input the past values of the features and of the target and returns an output representation.
The decoder takes as input the encoder output and the future values of the features and returns the predicted values of the target.
You can use any architecture for the encoder and for the decoder and you can also consider different approaches for passing the encoder output to the decoder (e.g. adding or concatenating it to the decoder input features, adding or concatenating it to the output of some intermediate decoder layer, or adding it to the final decoder output), the code below is just an example.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Input, Dense, LSTM, TimeDistributed, Concatenate, Add
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# define the inputs
target = ['value']
features = ['temperatures', 'hour of the day']
sequence_length = 96
# import the data
df = pd.read_csv('TestData.csv', sep=';', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime': [0]}, index_col=['datetime'])
# scale the data
target_scaler = StandardScaler().fit(df[target])
features_scaler = StandardScaler().fit(df[features])
df[target] = target_scaler.transform(df[target])
df[features] = features_scaler.transform(df[features])
# extract the input and output sequences
X_encoder = [] # past features and target values
X_decoder = [] # future features values
y = [] # future target values
for i in range(sequence_length, df.shape[0] - sequence_length):
X_encoder.append(df[features + target].iloc[i - sequence_length: i])
X_decoder.append(df[features].iloc[i: i + sequence_length])
y.append(df[target].iloc[i: i + sequence_length])
X_encoder = np.array(X_encoder)
X_decoder = np.array(X_decoder)
y = np.array(y)
# define the encoder and decoder
def encoder(encoder_features):
y = LSTM(units=100, return_sequences=True)(encoder_features)
y = TimeDistributed(Dense(units=1))(y)
return y
def decoder(decoder_features, encoder_outputs):
x = Concatenate(axis=-1)([decoder_features, encoder_outputs])
# x = Add()([decoder_features, encoder_outputs])
y = TimeDistributed(Dense(units=100, activation='relu'))(x)
y = TimeDistributed(Dense(units=1))(y)
return y
# build the model
encoder_features = Input(shape=X_encoder.shape[1:])
decoder_features = Input(shape=X_decoder.shape[1:])
encoder_outputs = encoder(encoder_features)
decoder_outputs = decoder(decoder_features, encoder_outputs)
model = Model([encoder_features, decoder_features], decoder_outputs)
# train the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model.fit([X_encoder, X_decoder], y, epochs=100, batch_size=128)
# extract the last predicted sequence
y_true = target_scaler.inverse_transform(y[-1, :])
y_pred = target_scaler.inverse_transform(model.predict([X_encoder, X_decoder])[-1, :])
# plot the last predicted sequence
plt.plot(y_true.flatten(), label='actual')
plt.plot(y_pred.flatten(), label='predicted')
plt.show()
In the example above the model takes two inputs, X_encoder and X_decoder, so in your case when generating the forecasts you can use the past observed temperatures in X_encoder and the future temperature forecasts in X_decoder.
It is a pytorch code to time series prediction with an known external/exogenous regressor to the given period forecasted.Hope it helps!!!Have a marvellous day !!!
The input format is a 3d Tensor an output 1d array (MISO-Multiple Inputs Single Output)
def CNN_Attention_Bidirectional_LSTM_Encoder_Decoder_predictions(model,data ,regressors, extrapolations_leght):
n_input = extrapolations_leght
pred_list = []
batch = data[-n_input:]
model = model.train()
pred_list.append(torch.cat(( model(batch)[-1], torch.FloatTensor(regressors.iloc[1,[1]]).to(device).unsqueeze(0)),1))
batch = torch.cat((batch[n_input-1].unsqueeze(0), pred_list[-1].unsqueeze(0)),1)
batch = batch[:, 1:, :]
for i in range(n_input-1):
model = model.eval()
pred_list.append(torch.cat((model(batch).squeeze(0), torch.FloatTensor(regressors.iloc[i+1,[1]]).to(device).unsqueeze(0)),1))
batch = torch.cat((batch, pred_list[-1].unsqueeze(0)),1)
batch = batch[:, 1:, :]
model = model.train()
return np.array([pred_list[j].cpu().detach().numpy() for j in range(n_input)])[:,:, 0]
Is it possible to look at this code in LSTM? I want to train the data with the shape which I put here but I receive an error regarding the size of the batch I think so. I do not know which size of the batch. currently, the size of a batch that I choose is 64. should I put another size for the batch or the error is not related to the size of the batch?
should I choose for this code: the shape of X (7311, 17, 124) and shape of Y(7311, 1)
InvalidArgumentError: Incompatible shapes: [16] vs. [64]
[[node gradient_tape/binary_crossentropy/weighted_loss/Mul (defined at <ipython-input-74-f95f7e276c58>:1) ]] [Op:__inference_train_function_138498]
df = pd.read_csv("train_data.csv")
timestep = 17 #from 1 to 23 (17 with the current NaN strategy)
threshold_for_classification = -8
X_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()
fill_X = -0.01
seed = 11
#RNN hiperparameter
epochs = 75
batch = 64
val_split = 0.25
test_split = 0.25
lr = 0.0001
adam = optimizers.Nadam() #(lr)
class_weight = {True: 5.,
False: 1.}
verbose = 1
#Dropping first the empty column and then rows with NaNs
df = df.drop("c_rcs_estimate", axis=1)
df = df.dropna(how='any')
#Filtering events with len=1 or min_tca > 2 or max_tca < 2
def conditions(event):
x = event["time_to_tca"].values
return ((x.min()<2.0) & (x.max()>2.0) & (x.shape[0]>1))
df = df.groupby('event_id').filter(conditions)
#OHE for c_object_type (5 categories) -> 5 new features
df["mission_id"] = df["mission_id"].astype('category')
df["c_object_type"] = df["c_object_type"].astype('category')
df = pd.get_dummies(df)
#Getting y as 1D-array
y = df.groupby(["event_id"])["risk"].apply(lambda x: x.iloc[-1]).values.reshape(-1, 1)
#Scaling y
_ = y_scaler.fit(df["risk"].values.reshape(-1, 1)) #using the whole risk feature to scale the target 'y'
y = y_scaler.transform(y)
#Getting X as df (dropping rows with tca < 2)
df = df.loc[df["time_to_tca"]>2]
#Adding feature 'event_length' for counting how many instances each event has
df["event_length"] = df.groupby('event_id')['event_id'].transform(lambda x: x.value_counts().idxmax())
#Scaling X
df = pd.DataFrame(X_scaler.fit_transform(df), columns=df.columns)
#Transforming X into a 3D-array
events = df["event_id"].nunique() #rows
features = len(df.columns) #columns
X = np.zeros((events,timestep,features))
X.fill(fill_X)
i = 0
def df_to_3darray(event):
global X, i
#Transforming an event to time series (1,timesteps, columns)
row = event.values.reshape(1,event.shape[0],event.shape[1])
#Condition is needed to slice arrays correctly
#Condition -> is timestep greater than the event's time series length?
if(timestep>=row.shape[1]):
X[i:i+1,-row.shape[1]:,:] = row
else:
X[i:i+1,:,:] = row[:,-timestep:,:]
#index to iterate over X array
i = i + 1
#dataframe remains intact, while X array has been filled.
return event
df.groupby("event_id").apply(df_to_3darray)
#Dropping event_id to remove noise
X = X[:,:,1:]
#TODO: Padding with specific values column-wise instead of zeros.
#TODO: Separating time dependent and independent feature in 2 X arrays
print(X.shape, y.shape)
#computing scaled threshold
th = np.array([threshold_for_classification]).reshape(-1,1)
th = y_scaler.transform(th)
threshold_scaled = th[0,0]
#Splitting arrays
y_boolean = (y > threshold_scaled).reshape(-1,1)
X_train, X_test, y_train_numeric, y_test_numeric = train_test_split(X, y,
stratify=y_boolean,
shuffle=True,
random_state=seed,
test_size = test_split
)
y_train_boolean = (y_train_numeric > threshold_scaled).reshape(-1,1)
X_train, X_val, y_train_numeric, y_val_numeric = train_test_split(X_train, y_train_numeric,
stratify=y_train_boolean,
shuffle=True,
random_state=seed,
test_size = val_split
)
#transforming it into a classification task -> y_train, y_test boolean
y_train = (y_train_numeric > threshold_scaled).reshape(-1,1)
y_val = (y_val_numeric > threshold_scaled).reshape(-1,1)
y_test = (y_test_numeric > threshold_scaled).reshape(-1,1)
X_train = tf.convert_to_tensor(X_train,dtype=tf.int64)
X_test = tf.convert_to_tensor( X_test,dtype=tf.int64)
y_train_numeric = tf.convert_to_tensor(y_train_numeric,dtype=tf.int64)
y_test_numeric = tf.convert_to_tensor(y_test_numeric,dtype=tf.int64)
y_train_boolean = tf.convert_to_tensor(y_train_boolean,dtype=tf.int64)
X_val = tf.convert_to_tensor(X_val,dtype=tf.int64)
y_val_numeric = tf.convert_to_tensor(y_val_numeric,dtype=tf.int64)
y_train = tf.convert_to_tensor(y_train,dtype=tf.int64)
y_val = tf.convert_to_tensor(y_val,dtype=tf.int64)
y_test = tf.convert_to_tensor(y_test,dtype=tf.int64)
y_boolean = tf.convert_to_tensor(y_boolean,dtype=tf.int64)
#Percentage of high risks in train
print("TRAIN {:0.1f}, {:0.1f}, {:0.3f}".format(np.sum(y_train), y_train.shape[0], np.sum(y_train)/y_train.shape[0]))
#Percentage of high risks in val
print("VAL {:0.1f}, {:0.1f}, {:0.3f}".format(np.sum(y_val), y_val.shape[0], np.sum(y_val)/y_val.shape[0]))
#Percentage of high risks in test
print("TEST {:0.1f}, {:0.1f}, {:0.3f}".format(np.sum(y_test), y_test.shape[0], np.sum(y_test)/y_test.shape[0]))
# Model activation selu
input_tensor = Input(batch_shape=(batch, timestep, X_train.shape[2]))
rnn_1 = LSTM(32, stateful=False, dropout=0.15, recurrent_dropout=0.3, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.01))(input_tensor)
batch_1 = BatchNormalization()(rnn_1)
rnn_2 = LSTM(16, stateful=False, dropout=0.15, recurrent_dropout=0.3, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.01))(batch_1)
batch_2 = BatchNormalization()(rnn_2)
rnn_3 = LSTM(8, stateful=False, dropout=0.15, recurrent_dropout=0.3, return_sequences=False, kernel_regularizer=L1L2(l1=0.0, l2=0.01))(batch_2)
batch_3 = BatchNormalization()(rnn_3)
output_tensor = Dense(units = 1, activation='sigmoid')(batch_3)
model = Model(inputs=input_tensor,
outputs= output_tensor)
model.compile(loss='binary_crossentropy',
optimizer=adam,
metrics=['accuracy'])
model.summary()
model_history = model.fit(X_train, y_train,
epochs=epochs,
batch_size=batch,
#shuffle=True, #OJO
validation_data=(X_val, y_val),
verbose=verbose,
class_weight=class_weight
).history
I would suggest changing this line
input_tensor = Input(batch_shape=(batch, timestep, X_train.shape[2]))
to
input_tensor = tf.keras.layers.Input(shape=(timestep, X_train.shape[2]))
and then defining your batch_size in model.fit and make sure X_train and y_train have the same number of samples.
Purely for learning, I'd like to get the following code to work, without a DataLoader. I use Huggingface transformers regularly yet I struggle with PyTorch dimensions all the time so I have started with some simple projects from the book "Deep Learning with PyTorch." One of the problems from the book suggested using a wine quality dataset on a super simple linear model. I have toiled with the dimensions of the data, which I think is the source of my error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (3919x1 and 11x100)
Data is available here
import csv
from collections import OrderedDict
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
wine_path = "winequality-white.csv"
wine_quality_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=";",
skiprows=1)
col_list = next(csv.reader(open(wine_path), delimiter=';'))
wineq = torch.from_numpy(wine_quality_numpy)
# print(wineq.shape, wineq.dtype)
data = wineq[:, :-1]
target = wineq[:, -1]
target = target.unsqueeze(1)
n_samples = wine_quality_numpy.shape[0]
n_val = int(0.2 * n_samples)
shuffled_indices = torch.randperm(n_samples)
train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]
target_train = target[train_indices]
data_train = data[train_indices]
target_val = target[val_indices]
data_val = data[val_indices]
seq_model = nn.Sequential(OrderedDict([
('hidden_linear', nn.Linear(11, 100)),
('hidden_activation', nn.Tanh()),
('output_linear', nn.Linear(100, 7))
]))
def training_loop(n_epochs, optimizer, model, loss_fn, target_train, target_val,
data_train, data_val):
for epoch in range(1, n_epochs + 1):
t_p_train = model(target_train) # <1>
loss_train = loss_fn(t_p_train, data_train)
t_p_val = model(t_u_val) # <1>
loss_val = loss_fn(t_p_val, data_val)
optimizer.zero_grad()
loss_train.backward() # <2>
optimizer.step()
if epoch == 1 or epoch % 1000 == 0:
print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
f" Validation loss {loss_val.item():.4f}")
optimizer = optim.SGD(seq_model.parameters(), lr=1e-3) # <1>
training_loop(
n_epochs = 5000,
optimizer = optimizer,
model = seq_model,
loss_fn = nn.MSELoss(),
target_train = target_train,
target_val = target_val,
data_train = data_train,
data_val = data_val)
Thank you!
In my haste I had the training data and labels swapped. Here is the fixed section.
seq_model = nn.Sequential(OrderedDict([
('hidden_linear', nn.Linear(11, 100)),
('hidden_activation', nn.Tanh()),
('output_linear', nn.Linear(100, 7))
]))
def training_loop(n_epochs, optimizer, model, loss_fn, target_train, target_val,
data_train, data_val):
for epoch in range(1, n_epochs + 1):
t_p_train = model(data_train) # <1>
loss_train = loss_fn(t_p_train, target_train)
t_p_val = model(data_val) # <1>
loss_val = loss_fn(t_p_val, target_val)
optimizer.zero_grad()
loss_train.backward() # <2>
optimizer.step()
if epoch == 1 or epoch % 1000 == 0:
print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
f" Validation loss {loss_val.item():.4f}")
When I start to train my model, Loss values decreasing but Accuracy values never change.I don't know why?
# -*- coding: utf-8 -*-
#Libraries
import torch
import torch.nn.functional as F
from torch import autograd, nn
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms, datasets
from torch.utils import data
"""
Olivetti face dataset
"""
from sklearn.datasets import fetch_olivetti_faces
# Olivetti dataset download
olivetti = fetch_olivetti_faces()
train = olivetti.images
label = olivetti.target
X = train
Y = label
print("Format for X:", X.shape)
print("Format for Y: ", Y.shape)
print("\nDownload Ok")
"""
Set for train
"""
train_rate = 0.8
X_train = np.zeros([int(train_rate * X.shape[0]),64,64], dtype=float)
Y_train = np.zeros([int(train_rate * X.shape[0])], dtype=int)
X_val = np.zeros([int((1-train_rate) * X.shape[0]+1),64,64], dtype=float)
Y_val = np.zeros([int((1-train_rate) * X.shape[0]+1)], dtype=int)
#Split data for train and validation
for i in range(X.shape[0]):
ie=0
iv=0
if (i%10)/10 >= train_rate:
X_train[ie] = X[i]
Y_train[ie] = Y[i]
ie += 1
else:
X_val[iv] = X[i]
Y_val[iv] = Y[i]
iv += 1
X_train = X_train.reshape(320,-1,64,64)
X_val = X_val.reshape(80,-1,64,64)
print(Y_train.shape)
X_train = torch.Tensor(X_train)
Y_train = torch.Tensor(Y_train)
X_val = torch.Tensor(X_val)
Y_val = torch.Tensor(Y_val)
batch_size = 20
train_loader = torch.utils.data.DataLoader(X_train,
batch_size=batch_size,
)
val_loader = torch.utils.data.DataLoader(X_val,
batch_size=batch_size,
)
class CNNModule(nn.Module):
def __init__(self):
super(CNNModule, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 13 * 13, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 40)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 13 * 13)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def make_train(model,dataset,n_iters,gpu):
# Organize data
X_train,Y_train,X_val,Y_val = dataset
kriter = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)
#Arrays to save loss and accuracy
tl=np.zeros(n_iters) #For train loss
ta=np.zeros(n_iters) #For train accuracy
vl=np.zeros(n_iters) #For validation loss
va=np.zeros(n_iters) #For validation accuracy
# Convert labels to long
Y_train = Y_train.long()
Y_val = Y_val.long()
# GPU control
if gpu:
X_train,Y_train = X_train.cuda(),Y_train.cuda()
X_val,Y_val = X_val.cuda(),Y_val.cuda()
model = model.cuda() # Parameters to GPU!
print("Using GPU")
else:
print("Using CPU")
# print(X_train.shape)
# print(Y_train.shape)
for i in range(n_iters):
# train forward
train_out = model.forward(X_train)
train_loss = kriter(train_out,Y_train)
# Backward and optimization
train_loss.backward()
optimizer.step()
optimizer.zero_grad()
# Compute train accuracy
train_predict = train_out.cpu().detach().argmax(dim=1)
train_accuracy = (train_predict.cpu().numpy()==Y_train.cpu().numpy()).mean()
# For validation
val_out = model.forward(X_val)
val_loss = kriter(val_out,Y_val)
# Compute validation accuracy
val_predict = val_out.cpu().detach().argmax(dim=1)
val_accuracy = (val_predict.cpu().numpy()==Y_val.cpu().numpy()).mean()
tl[i] = train_loss.cpu().detach().numpy()
ta[i] = train_accuracy
vl[i] = val_loss.cpu().detach().numpy()
va[i] = val_accuracy
# Show result each 5 loop
if i%5==0:
print("Loop --> ",i)
print("Train Loss :",train_loss.cpu().detach().numpy())
print("Train Accuracy :",train_accuracy)
print("Validation Loss :",val_loss.cpu().detach().numpy())
print("Validation Accuracy :",val_accuracy)
model = model.cpu()
#Print result
plt.subplot(2,2,1)
plt.plot(np.arange(n_iters), tl, 'r-')
plt.subplot(2,2,2)
plt.plot(np.arange(n_iters), ta, 'b--')
plt.subplot(2,2,3)
plt.plot(np.arange(n_iters), vl, 'r-')
plt.subplot(2,2,4)
plt.plot(np.arange(n_iters), va, 'b--')
dataset = X_train,Y_train,X_val,Y_val
gpu = True
gpu = gpu and torch.cuda.is_available()
model = CNNModule()
make_train(model,dataset,100,gpu)
OUTPUT:
Using CPU
Loop --> 0
Train Loss : 3.6302185
Train Accuracy : 0.0
Validation Loss : 3.6171098
Validation Accuracy : 0.0
Loop --> 5
Train Loss : 3.557933
Train Accuracy : 0.996875
Validation Loss : 3.545982
Validation Accuracy : 0.9875
.
.
.
Loop --> 95
Train Loss : 0.04211783
Train Accuracy : 0.996875
Validation Loss : 0.13397054
Validation Accuracy : 0.9875
From your code,
train_accuracy = (train_predict.cpu().numpy()==Y_train.cpu().numpy()).mean()
you are taking mean of correct values that's why you are getting same answer in every loop. Instead you should divide the total correct numbers with total number of examples to find the accuracy.
I have a dataframe containing numerical daily data and a target variable ("Score") in the last column that I am trying to predict. The code below seems to work but I would like to visualise the results of the model fit while the model is calibrating against the actual data in the training set.
All variables are time series so they are ordered in time but the plotting I managed to test shows the actual time series of the target variable (in the training period) but for the predicted values I didn't manage to get the expected results.
If I plot them, the fitted values don't respect the time ordering of the actual data and this seems to be due to the fact that there is a shuffling happening at a earlier stage.
How can I recover the time ordering of the fitted data at each iteration so that I can compare against the actual target variable while the model calibrates?
#allData is a dataframe containing the target variable in last column
y = 'Score' #name of the target variable to predict
target = allData[y].shift(1).dropna() #shift by 1 days as I want to predict the future score
X_ = allData.drop([y], axis=1) #all features
df = pd.concat([X_, target], join='outer', axis=1).dropna() #put them all back in a dataframe
df_train = df['2015':'2018']
df_test = df[prediction[0] : prediction[1]]
#scale variables
scaler = MinMaxScaler(feature_range=(-1, 1))
train= scaler.fit_transform(df_train.values)
test = scaler.transform(df_test.values)
x_train = train[:, :-1]
y_train = train[:, -1]
x_test = test[:, :-1]
num_features = x_train.shape[1]
x = tf.placeholder(dtype=tf.float32, shape=[None, num_features])
y_ = tf.placeholder(dtype=tf.float32, shape=[None])
nl_1, nl_2, nl_3, nl_4 = 512, 256, 128, 64
wi = tf.contrib.layers.variance_scaling_initializer(mode='FAN_AVG', uniform=True, factor=1)
zi = tf.zeros_initializer()
# 4 Hidden layers
wt_hidden_1 = tf.Variable(wi([num_features, nl_1]))
bias_hidden_1 = tf.Variable(zi([nl_1]))
wt_hidden_2 = tf.Variable(wi([nl_1, nl_2]))
bias_hidden_2 = tf.Variable(zi([nl_2]))
wt_hidden_3 = tf.Variable(wi([nl_2, nl_3]))
bias_hidden_3 = tf.Variable(zi([nl_3]))
wt_hidden_4 = tf.Variable(wi([nl_3, nl_4]))
bias_hidden_4 = tf.Variable(zi([nl_4]))
# Output layer
wt_out = tf.Variable(wi([nl_4, 1]))
bias_out = tf.Variable(zi([1]))
hidden_1 = tf.nn.relu(tf.add(tf.matmul(x, wt_hidden_1), bias_hidden_1))
hidden_2 = tf.nn.relu(tf.add(tf.matmul(hidden_1, wt_hidden_2), bias_hidden_2))
hidden_3 = tf.nn.relu(tf.add(tf.matmul(hidden_2, wt_hidden_3), bias_hidden_3))
hidden_4 = tf.nn.relu(tf.add(tf.matmul(hidden_3, wt_hidden_4), bias_hidden_4))
out = tf.transpose(tf.add(tf.matmul(hidden_4, wt_out), bias_out))
mse = tf.reduce_mean(tf.squared_difference(out, y_))
optimizer = tf.train.AdamOptimizer().minimize(mse)
session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())
BATCH_SIZE = 100
EPOCHS = 100
for epoch in range(EPOCHS):
# Shuffle the training data
shuffle_data = permutation(arange(len(y_train)))
x_train = x_train[shuffle_data]
y_train = y_train[shuffle_data]
# Mini-batch training
for i in range(len(y_train)//BATCH_SIZE):
start = i*BATCH_SIZE
batch_x = x_train[start:start+BATCH_SIZE]
batch_y = y_train[start:start+BATCH_SIZE]
session.run(optimizer, feed_dict={x: batch_x, y_: batch_y})
# Show plot of fitted model against actual data
if np.mod(i, 5) == 0:
pred = session.run(out, feed_dict={x: x_train}) #x_train is scaled
dd = train.copy()
dd[:, -1] = pred[0]
pred = scaler.inverse_transform(dd) #need to rescale in order to compare with actual data
fig = plt.figure()
ax1 = fig.add_subplot(111)
line1, = ax1.plot(df_train[y].values) #the actual data in the training period
line2, = ax1.plot(pred[:, -1][::-1]) #the fitted data in the training period don't seem to be ordered in time, like the original data
plt.title('Epoch ' + str(epoch) + ', Batch ' + str(i))
plt.show()
plt.pause(0.01)