I'm attempting to use a Keras Model in a Genetic Algorithm for text classification, however i'm getting an error with pad_sequences where it claims that:
TypeError: pad_sequences() got multiple values for argument 'maxlen'
The actual pad_sequences variable assignment is:
data = self.pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
Which is found within:
def get_data(self):
"""Retrieve the dataset and process the data."""
batch_size = 128
VALIDATION_SPLIT = 0.2
MAX_SEQUENCE_LENGTH = 1000
MAX_NUM_WORDS = 20000
csv = 'VocabCSV.csv'
my_df = self.pd.read_csv(csv,index_col=0,encoding = 'latin-1')
my_df.dropna(inplace=True)
my_df.reset_index(drop=True,inplace=True)
print(my_df.info())
texts = my_df.Text # list of text samples
labellist = my_df.Target # list of labels
label_vals = [] # label values list
labels_index = {} # dictionary mapping label name to numeric id
labels = [] # list of label ids
for label in labellist:
if label not in label_vals:
label_vals.append(label)
for idx, text in enumerate(texts):
for label in label_vals:
if label == labellist[idx]:
label_id = label_vals.index(label)
labels_index[text] = label_id
labels.append(label_id)
print("labels index {}".format(len(labels_index)))
print("labels size: %s " % len(labels))
print("found %s texts." % len(texts))
# finally, vectorize the text samples into a 2D integer tensor
tokenizer = self.Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
data = self.pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
print(self.np.asarray(labels).shape)
labels = self.to_categorical(labels)
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)
# split the data into a training set and a validation set
indices = self.np.arange(data.shape[0])
self.np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
num_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
x_train = data[:-num_validation_samples]
y_train = labels[:-num_validation_samples]
x_test = data[-num_validation_samples:]
y_test = labels[-num_validation_samples:]
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
print(len(x_test))
print(len(y_test))
input_shape = MAX_SEQUENCE_LENGTH
print(input_shape)
nb_classes = len(label_vals)
return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test, word_index)
The error appears to be occurring whenever get_data is called by another function, however I can't identify what's actually causing it.
The issue is that you have self.pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH). The pad_sequences method doesn't belong to your class, but is from keras.preprocessing.sequence.
So if you want it to work correctly, do the import like this:
from keras.preprocessing import sequence
And then call the pad_sequences like this:
sequences = sequence.pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
Related
I'm new to both python and image classification. I tried to do K-cross validation. Here's my code and when the error is arise:
import numpy as np
from sklearn.preprocessing import LabelEncoder
def load_images_and_label(folder):
images = []
labels = []
for class_folder in os.listdir(folder):
class_path = os.path.join(folder, class_folder)
if os.path.isdir(class_path):
for filename in os.listdir(class_path):
img = cv2.imread(os.path.join(class_path, filename))
if img is not None:
images.append(img)
labels.append(class_folder)
label_encoder = LabelEncoder()
integer_encoded_labels = label_encoder.fit_transform(labels)
return (np.array(images), integer_encoded_labels)
then I called the module above to change the input for the cross validation
root_dir = "/path-to-dataset"
images, labels = load_images_and_label(root_dir)
And then below is how I did the K-cross fold validation
x_data = images
y_data = labels
num_classes = len(np.unique(y_data))
y_data = keras.utils.to_categorical(y_data, num_classes)
kfold = StratifiedKFold(n_splits=5, shuffle=True)
cvscores = []
fold_num = 1
for train, test in enumerate(kfold.split(x_data, y_data.argmax(1))):
xception_model = tf.keras.Model(input, output)
xception_model.compile(tf.keras.optimizers.Adam(lr=0.0001), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
print(x_data[test])
When I tried to print (x_data[test]) it gives me the error of shape mismatch: indexing arrays could not be broadcast together with shapes (17136,) (4284,) . But when I tried (x_data[train]), it works well. How do I fix this? I don't really understand.
And for my dataset the structure will be
-dataset
-class_1
-file_class_1.png
...
-class_2
-file_class_2.png
...
-class_3
-file_class_3.png
...
-class_4
-file_class_4.png
...
I am doing a course on Natural Language processing using TensorFlow on coursera. In one of the labs, we have to explore overfitting in NLP. I am continuously getting the same error while running the model. The error is
Failed to convert a NumPy array to a Tensor (Unsupported object type list).
Here is the code. I think something is wrong with train_labels and val_labels. I tried using .astpye() but, it gives an error setting an array with a sequence.
import csv
import random
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt
from scipy.stats import linregress
EMBEDDING_DIM = 100
MAXLEN = 16
TRUNCATING = 'post'
PADDING = 'post'
OOV_TOKEN = "<OOV>"
MAX_EXAMPLES = 160000
TRAINING_SPLIT = 0.9
SENTIMENT_CSV = "./data/training_cleaned.csv"
with open(SENTIMENT_CSV, 'r') as csvfile:
print(f"First data point looks like this:\n\n{csvfile.readline()}")
print(f"Second data point looks like this:\n\n{csvfile.readline()}")
def parse_data_from_file(filename):
sentences = []
labels = []
with open(filename, 'r') as csvfile:
### START CODE HERE
reader = csv.reader(csvfile, delimiter=",")
for row in reader:
sentences.append(row[5])
if row[0] == '0':
labels.append(0)
else:
labels.append(1)
labels.append(labels)
### END CODE HERE
return sentences, labels
sentences, labels = parse_data_from_file(SENTIMENT_CSV)
print(f"dataset contains {len(sentences)} examples\n")
print(f"Text of second example should look like this:\n{sentences[1]}\n")
print(f"Text of fourth example should look like this:\n{sentences[3]}")
print(f"\nLabels of last 5 examples should look like this:\n{labels[-5:]}")
sentences_and_labels = list(zip(sentences, labels))
# Perform random sampling
random.seed(42)
sentences_and_labels = random.sample(sentences_and_labels, MAX_EXAMPLES)
# Unpack back into separate lists
sentences, labels = zip(*sentences_and_labels)
print(f"There are {len(sentences)} sentences and {len(labels)} labels after random sampling\n")
def train_val_split(sentences, labels, training_split):
train_size = int(len(sentences) * training_split)
# Split the sentences and labels into train/validation splits
train_sentences = sentences[:train_size]
train_labels = labels[:train_size]
validation_sentences = sentences[train_size:]
validation_labels = labels[train_size:]
### END CODE HERE
return train_sentences, validation_sentences, train_labels, validation_labels
train_sentences, val_sentences, train_labels, val_labels = train_val_split(sentences, labels, TRAINING_SPLIT)
print(f"There are {len(train_sentences)} sentences for training.\n")
print(f"There are {len(train_labels)} labels for training.\n")
print(f"There are {len(val_sentences)} sentences for validation.\n")
print(f"There are {len(val_labels)} labels for validation.")
def fit_tokenizer(train_sentences, oov_token):
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(train_sentences)
# Fit the tokenizer to the training sentences
### END CODE HERE
return tokenizer
tokenizer = fit_tokenizer(train_sentences, OOV_TOKEN)
word_index = tokenizer.word_index
VOCAB_SIZE = len(word_index)
print(f"Vocabulary contains {VOCAB_SIZE} words\n")
print("<OOV> token included in vocabulary" if "<OOV>" in word_index else "<OOV> token NOT included in vocabulary")
print(f"\nindex of word 'i' should be {word_index['i']}")
def seq_pad_and_trunc(sentences, tokenizer, padding, truncating, maxlen):
sequences = tokenizer.texts_to_sequences(sentences)
# Pad the sequences using the correct padding, truncating and maxlen
pad_trunc_sequences = pad_sequences(sequences, maxlen=maxlen, padding = padding, truncating=truncating)
### END CODE HERE
return pad_trunc_sequences
train_pad_trunc_seq = seq_pad_and_trunc(train_sentences, tokenizer, PADDING, TRUNCATING, MAXLEN)
val_pad_trunc_seq = seq_pad_and_trunc(val_sentences, tokenizer, PADDING, TRUNCATING, MAXLEN)
print(f"Padded and truncated training sequences have shape: {train_pad_trunc_seq.shape}\n")
print(f"Padded and truncated validation sequences have shape: {val_pad_trunc_seq.shape}")
train_labels = np.asarray(train_labels)
val_labels = np.asarray(val_labels)
GLOVE_FILE = './data/glove.6B.100d.txt'
# Initialize an empty embeddings index dictionary
GLOVE_EMBEDDINGS = {}
# Read file and fill GLOVE_EMBEDDINGS with its contents
with open(GLOVE_FILE) as f:
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
GLOVE_EMBEDDINGS[word] = coefs
EMBEDDINGS_MATRIX = np.zeros((VOCAB_SIZE+1, EMBEDDING_DIM))
# Iterate all of the words in the vocabulary and if the vector representation for
# each word exists within GloVe's representations, save it in the EMBEDDINGS_MATRIX array
for word, i in word_index.items():
embedding_vector = GLOVE_EMBEDDINGS.get(word)
if embedding_vector is not None:
EMBEDDINGS_MATRIX[i] = embedding_vector
def create_model(vocab_size, embedding_dim, maxlen, embeddings_matrix):
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size+1, embedding_dim,input_length=maxlen, weights=[embeddings_matrix],trainable=False),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Conv1D(64, 5, activation='relu'),
tf.keras.layers.MaxPooling1D(pool_size=4),
tf.keras.layers.LSTM(64, activation='relu', input_shape=(1000, 1), return_sequences=True),
tf.keras.layers.Dense(1, activation='sigmoid'),
tf.keras.layers.Dense(1, activation='relu')
])
model.compile(loss='binary_crossentropy', optimizer='adam',metrics='accuracy')
### END CODE HERE
return model
model = create_model(VOCAB_SIZE, EMBEDDING_DIM, MAXLEN, EMBEDDINGS_MATRIX)
history = model.fit(train_pad_trunc_seq, train_labels, epochs=1, validation_data = (val_pad_trunc_seq,val_labels))
Error shown:
The error is due to the parse_data_from_file() function which is creating incorrect labels. Kindly change it like below;
def parse_data_from_file(filename):
sentences = []
labels = []
with open(filename, 'r') as csvfile:
### START CODE HERE
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
label = int(row[0]) /4
label = int(label)
labels.append(label)
sentence = row[5]
sentences.append(sentence)
### END CODE HERE
return sentences, labels
Please find the working code here. Thank you!
I have trained a POS tagger using Bi-LSTM. Here I attached the code that was used to train the model.
# Load files containing word sequence and tag sequence
with codecs.open('news_verified_train_words_adj_adverb.txt','r',encoding='utf-8') as f1:
train_data = f1.readlines()
with codecs.open('news_verified_train_tags_adj_adverb.txt','r',encoding='utf-8') as f2:
train_tags = f2.readlines()
news_verified_train_words_adj_adverb contains the word sequences separated by a new line (each line contains simply a sentence) and news_verified_train_tags_adj_adverb contains the corresponding part of speech tag sequence for each sentence (each line contains simply a tag sequence).
X = [] #2d list contains sentences, inner list holds words of each sentence
Y = [] #2d list contains tag sequence for each sentence, inner list holds tags
for i in range (len(train_data)):
sentence = []
tag_sequence = []
sentence_splitted = train_data[i].strip().split()
tag_sequence_splitted = train_tags[i].strip().split()
for j in range (len(sentence_splitted)):
sentence.append(sentence_splitted[j])
tag_sequence.append(tag_sequence_splitted[j])
X.append(sentence)
Y.append(tag_sequence)
# encode X
word_tokenizer = Tokenizer() # instantiate tokeniser
word_tokenizer.fit_on_texts(X) # fit tokeniser on data
X_encoded = word_tokenizer.texts_to_sequences(X) # use the tokeniser to encode input sequence
# encode Y
tag_tokenizer = Tokenizer()
tag_tokenizer.fit_on_texts(Y)
Y_encoded = tag_tokenizer.texts_to_sequences(Y)
# Pad each sequence to MAX_SEQ_LENGTH using KERAS' pad_sequences() function.
# Sentences longer than MAX_SEQ_LENGTH are truncated.
# Sentences shorter than MAX_SEQ_LENGTH are padded with zeroes.
# Truncation and padding can either be 'pre' or 'post'.
# For padding we are using 'pre' padding type, that is, add zeroes on the left side.
# For truncation, we are using 'post', that is, truncate a sentence from right side.
MAX_SEQ_LENGTH = 300 # sequences greater than 200 in length will be truncated
X_padded = pad_sequences(X_encoded, maxlen=MAX_SEQ_LENGTH, padding="pre", truncating="post")
Y_padded = pad_sequences(Y_encoded, maxlen=MAX_SEQ_LENGTH, padding="pre", truncating="post")
# assign padded sequences to X and Y
X, Y = X_padded, Y_padded
# load fastText word vectors using the following function present in the gensim library
fastTextWordVectors = KeyedVectors.load_word2vec_format('cc.si.300.vec')
# assign word vectors from word2vec model
EMBEDDING_SIZE = 300 # each word in word2vec model is represented using a 300 dimensional vector
VOCABULARY_SIZE = len(word_tokenizer.word_index) + 1
# create an empty embedding matix
embedding_weights = np.zeros((VOCABULARY_SIZE, EMBEDDING_SIZE))
# create a word to index dictionary mapping
word2id = word_tokenizer.word_index
# copy vectors from word2vec model to the words present in corpus
for word, index in word2id.items():
try:
embedding_weights[index, :] = fastTextWordVectors[word]
except KeyError:
pass
# use Keras' to_categorical function to one-hot encode Y
Y = to_categorical(Y,dtype="int32")
# split entire data into training and testing sets
TEST_SIZE = 0.15
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=TEST_SIZE, random_state=4)
# split training data into training and validation sets
VALID_SIZE = 0.15
X_train, X_validation, Y_train, Y_validation = train_test_split(X_train, Y_train, test_size=VALID_SIZE, random_state=4)
# total number of tags
NUM_CLASSES = Y.shape[2]
print(NUM_CLASSES)
# create architecture
bidirect_model_adj_adv = Sequential()
bidirect_model_adj_adv.add(Embedding(input_dim = VOCABULARY_SIZE,
output_dim = EMBEDDING_SIZE,
input_length = MAX_SEQ_LENGTH,
weights = [embedding_weights],
trainable = True
))
bidirect_model_adj_adv.add(Bidirectional(LSTM(64, return_sequences=True)))
bidirect_model_adj_adv.add(TimeDistributed(Dense(NUM_CLASSES, activation='softmax')))
#compile model
bidirect_model_adj_adv.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['acc'])
# check summary of model
bidirect_model_adj_adv.summary()
bidirect_adj_adv_training = bidirect_model_adj_adv.fit(X_train, Y_train, batch_size=128, epochs=40, validation_data=(X_validation, Y_validation))
When predicting using the trained model, for some input word sequences the predicted pos tag sequence is not matched. here I attached the code sample used to test that.
predicted_results = bidirect_model_adj_adv.predict(X_test)
category_meaning_pr = np.argmax(predicted_results, axis=-1)
#predicted tags
predicted_tags = tag_tokenizer.sequences_to_texts(category_meaning_pr)
category_meaning_at = np.argmax(Y_test, axis=-1)
#actual tags
actual_tags = tag_tokenizer.sequences_to_texts(category_meaning_at)
not_matching = []
for i in range(len(actual_tags)):
actual_tags_per_item = actual_tags[i].split()
predicted_tags_per_item = predicted_tags[i].split()
if len(actual_tags_per_item) != len(predicted_tags_per_item):
print(i)
not_matching.append(i)
If there are any elements appended in the not_matching list, for those word sequences, number of predicted tags is not same as the number of the word sequence. In my scenario, I got a few elements appended in not_matching list. What is the reason for that?
I currently have a RNN model for time series predictions. It uses 3 input features "value", "temperature" and "hour of the day" of the last 96 time steps to predict the next 96 time steps of the feature "value".
Here you can see a schema of it:
and here you have the current code:
#Import modules
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow import keras
# Define the parameters of the RNN and the training
epochs = 1
batch_size = 50
steps_backwards = 96
steps_forward = 96
split_fraction_trainingData = 0.70
split_fraction_validatinData = 0.90
randomSeedNumber = 50
#Read dataset
df = pd.read_csv('C:/Users/Desktop/TestData.csv', sep=';', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime':[0]}, index_col=['datetime'])
# standardize data
data = df.values
indexWithYLabelsInData = 0
data_X = data[:, 0:3]
data_Y = data[:, indexWithYLabelsInData].reshape(-1, 1)
scaler_standardized_X = StandardScaler()
data_X = scaler_standardized_X.fit_transform(data_X)
data_X = pd.DataFrame(data_X)
scaler_standardized_Y = StandardScaler()
data_Y = scaler_standardized_Y.fit_transform(data_Y)
data_Y = pd.DataFrame(data_Y)
# Prepare the input data for the RNN
series_reshaped_X = np.array([data_X[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
series_reshaped_Y = np.array([data_Y[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
timeslot_x_train_end = int(len(series_reshaped_X)* split_fraction_trainingData)
timeslot_x_valid_end = int(len(series_reshaped_X)* split_fraction_validatinData)
X_train = series_reshaped_X[:timeslot_x_train_end, :steps_backwards]
X_valid = series_reshaped_X[timeslot_x_train_end:timeslot_x_valid_end, :steps_backwards]
X_test = series_reshaped_X[timeslot_x_valid_end:, :steps_backwards]
Y_train = series_reshaped_Y[:timeslot_x_train_end, steps_backwards:]
Y_valid = series_reshaped_Y[timeslot_x_train_end:timeslot_x_valid_end, steps_backwards:]
Y_test = series_reshaped_Y[timeslot_x_valid_end:, steps_backwards:]
# Build the model and train it
np.random.seed(randomSeedNumber)
tf.random.set_seed(randomSeedNumber)
model = keras.models.Sequential([
keras.layers.SimpleRNN(10, return_sequences=True, input_shape=[None, 3]),
keras.layers.SimpleRNN(10, return_sequences=True),
keras.layers.TimeDistributed(keras.layers.Dense(1))
])
model.compile(loss="mean_squared_error", optimizer="adam", metrics=['mean_absolute_percentage_error'])
history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_valid, Y_valid))
#Predict the test data
Y_pred = model.predict(X_test)
# Inverse the scaling (traInv: transformation inversed)
data_X_traInv = scaler_standardized_X.inverse_transform(data_X)
data_Y_traInv = scaler_standardized_Y.inverse_transform(data_Y)
series_reshaped_X_notTransformed = np.array([data_X_traInv[i:i + (steps_backwards+steps_forward)].copy() for i in range(len(data) - (steps_backwards+steps_forward))])
X_test_notTranformed = series_reshaped_X_notTransformed[timeslot_x_valid_end:, :steps_backwards]
Y_pred_traInv = scaler_standardized_Y.inverse_transform (Y_pred)
Y_test_traInv = scaler_standardized_Y.inverse_transform (Y_test)
# Calculate errors for every time slot of the multiple predictions
abs_diff = np.abs(Y_pred_traInv - Y_test_traInv)
abs_diff_perPredictedSequence = np.zeros((len (Y_test_traInv)))
average_LoadValue_testData_perPredictedSequence = np.zeros((len (Y_test_traInv)))
abs_diff_perPredictedTimeslot_ForEachSequence = np.zeros((len (Y_test_traInv)))
absoluteError_Load_Ratio_allPredictedSequence = np.zeros((len (Y_test_traInv)))
absoluteError_Load_Ratio_allPredictedTimeslots = np.zeros((len (Y_test_traInv)))
mse_perPredictedSequence = np.zeros((len (Y_test_traInv)))
rmse_perPredictedSequence = np.zeros((len(Y_test_traInv)))
for i in range (0, len(Y_test_traInv)):
for j in range (0, len(Y_test_traInv [0])):
abs_diff_perPredictedSequence [i] = abs_diff_perPredictedSequence [i] + abs_diff [i][j]
mse_perPredictedSequence [i] = mean_squared_error(Y_pred_traInv[i] , Y_test_traInv [i] )
rmse_perPredictedSequence [i] = np.sqrt(mse_perPredictedSequence [i])
abs_diff_perPredictedTimeslot_ForEachSequence [i] = abs_diff_perPredictedSequence [i] / len(Y_test_traInv [0])
average_LoadValue_testData_perPredictedSequence [i] = np.mean (Y_test_traInv [i])
absoluteError_Load_Ratio_allPredictedSequence [i] = abs_diff_perPredictedSequence [i] / average_LoadValue_testData_perPredictedSequence [i]
absoluteError_Load_Ratio_allPredictedTimeslots [i] = abs_diff_perPredictedTimeslot_ForEachSequence [i] / average_LoadValue_testData_perPredictedSequence [i]
rmse_average_allPredictictedSequences = np.mean (rmse_perPredictedSequence)
absoluteAverageError_Load_Ratio_allPredictedSequence = np.mean (absoluteError_Load_Ratio_allPredictedSequence)
absoluteAverageError_Load_Ratio_allPredictedTimeslots = np.mean (absoluteError_Load_Ratio_allPredictedTimeslots)
absoluteAverageError_allPredictedSequences = np.mean (abs_diff_perPredictedSequence)
absoluteAverageError_allPredictedTimeslots = np.mean (abs_diff_perPredictedTimeslot_ForEachSequence)
Here you have some test data Download Test Data
So now I actually would like to include not only past values of the features into the prediction but also future values of the features "temperature" and "hour of the day" into the prediction. The future values of the feature "temperature" can for example be taken from an external weather forecasting service and for the feature "hour of the day" the future values are know before (in the test data I have included a "forecast" of the temperature that is not a real forecast; I just randomly changed the values).
This way, I could assume that - for several applications and data - the forecast could be improved.
In a schema it would look like this:
Can anyone tell me, how I can do that in Keras with a RNN (or LSTM)? One way could be to include the future values as independant features as input. But I would like the model to know that the future values of a feature are connected to the past values of a feature.
Reminder: Does anybody have an idea how to do this? I'll highly appreciate every comment.
The standard approach is to use an encoder-decoder architecture (see 1 and 2 for instance):
The encoder takes as input the past values of the features and of the target and returns an output representation.
The decoder takes as input the encoder output and the future values of the features and returns the predicted values of the target.
You can use any architecture for the encoder and for the decoder and you can also consider different approaches for passing the encoder output to the decoder (e.g. adding or concatenating it to the decoder input features, adding or concatenating it to the output of some intermediate decoder layer, or adding it to the final decoder output), the code below is just an example.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Input, Dense, LSTM, TimeDistributed, Concatenate, Add
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# define the inputs
target = ['value']
features = ['temperatures', 'hour of the day']
sequence_length = 96
# import the data
df = pd.read_csv('TestData.csv', sep=';', header=0, low_memory=False, infer_datetime_format=True, parse_dates={'datetime': [0]}, index_col=['datetime'])
# scale the data
target_scaler = StandardScaler().fit(df[target])
features_scaler = StandardScaler().fit(df[features])
df[target] = target_scaler.transform(df[target])
df[features] = features_scaler.transform(df[features])
# extract the input and output sequences
X_encoder = [] # past features and target values
X_decoder = [] # future features values
y = [] # future target values
for i in range(sequence_length, df.shape[0] - sequence_length):
X_encoder.append(df[features + target].iloc[i - sequence_length: i])
X_decoder.append(df[features].iloc[i: i + sequence_length])
y.append(df[target].iloc[i: i + sequence_length])
X_encoder = np.array(X_encoder)
X_decoder = np.array(X_decoder)
y = np.array(y)
# define the encoder and decoder
def encoder(encoder_features):
y = LSTM(units=100, return_sequences=True)(encoder_features)
y = TimeDistributed(Dense(units=1))(y)
return y
def decoder(decoder_features, encoder_outputs):
x = Concatenate(axis=-1)([decoder_features, encoder_outputs])
# x = Add()([decoder_features, encoder_outputs])
y = TimeDistributed(Dense(units=100, activation='relu'))(x)
y = TimeDistributed(Dense(units=1))(y)
return y
# build the model
encoder_features = Input(shape=X_encoder.shape[1:])
decoder_features = Input(shape=X_decoder.shape[1:])
encoder_outputs = encoder(encoder_features)
decoder_outputs = decoder(decoder_features, encoder_outputs)
model = Model([encoder_features, decoder_features], decoder_outputs)
# train the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model.fit([X_encoder, X_decoder], y, epochs=100, batch_size=128)
# extract the last predicted sequence
y_true = target_scaler.inverse_transform(y[-1, :])
y_pred = target_scaler.inverse_transform(model.predict([X_encoder, X_decoder])[-1, :])
# plot the last predicted sequence
plt.plot(y_true.flatten(), label='actual')
plt.plot(y_pred.flatten(), label='predicted')
plt.show()
In the example above the model takes two inputs, X_encoder and X_decoder, so in your case when generating the forecasts you can use the past observed temperatures in X_encoder and the future temperature forecasts in X_decoder.
It is a pytorch code to time series prediction with an known external/exogenous regressor to the given period forecasted.Hope it helps!!!Have a marvellous day !!!
The input format is a 3d Tensor an output 1d array (MISO-Multiple Inputs Single Output)
def CNN_Attention_Bidirectional_LSTM_Encoder_Decoder_predictions(model,data ,regressors, extrapolations_leght):
n_input = extrapolations_leght
pred_list = []
batch = data[-n_input:]
model = model.train()
pred_list.append(torch.cat(( model(batch)[-1], torch.FloatTensor(regressors.iloc[1,[1]]).to(device).unsqueeze(0)),1))
batch = torch.cat((batch[n_input-1].unsqueeze(0), pred_list[-1].unsqueeze(0)),1)
batch = batch[:, 1:, :]
for i in range(n_input-1):
model = model.eval()
pred_list.append(torch.cat((model(batch).squeeze(0), torch.FloatTensor(regressors.iloc[i+1,[1]]).to(device).unsqueeze(0)),1))
batch = torch.cat((batch, pred_list[-1].unsqueeze(0)),1)
batch = batch[:, 1:, :]
model = model.train()
return np.array([pred_list[j].cpu().detach().numpy() for j in range(n_input)])[:,:, 0]
so I am new to using Tensorflow so please bear with me.
I want to create a LSTM rnn for sequence prediction, but since i am also going to use this for tuning hyper parameters, i would like the input to be of the format
[n1,n2,n3...]
and output will be a single number.
eg: input[1,2,3,4] | output=[5]
I have created a dictionary like so:
dict = { "n1":array(1,2,3,4,5), "n2":array(2,3,4,5,6), "n3":array(3,4,5,6,7), "n4":array(4,5,6,7,8) }
labels/output=[5,6,7,8,9]
I would like to use an estimator, i was going to use DynamicRnnEstimator() but it is going to be removed in a future version and so i would like to create my own custom estimator for this.
i want to know how to create y own custom estimator for this particular scenario of training the mode to predict the next number given a sequence of "sequence_length".
i have many sequences for the training.
the code I have written till now is below:
# START
# IMPORTS
import numpy as np
import tensorflow as tf
import csv
from tensorflow.contrib.learn import DynamicRnnEstimator
from tensorflow.python import debug as tf_debug
# GLOBAL VARIABLES
sequence_length = 4 # predict the 5th number give the previous sequence_length numbers.
batch_size = 10
n_epochs = 100
n_neurons = 5
# NORMALIZING THE DATA
def normalize_data(train, test):
m11 = float(max(train))
m12 = float(min(train))
normalized_train = [float((x-m12)/(m11-m12)) for x in train]
m21 = float(max(test))
m22 = float(min(train))
normalized_test = [float((x-m22)/(m21-m22)) for x in test]
return normalized_train, normalized_test
# LOADING THE DATA
def load_data(train, test):
with open(train, 'r') as csvfile1:
reader1 = csv.reader(csvfile1, delimiter = ',')
train = [ float(row[1]) for row in reader1]
with open(test, 'r') as csvfile2:
reader2 = csv.reader(csvfile2, delimiter = ',')
test = [ float(row[1]) for row in reader2]
normalized_train, normalized_test = normalize_data(train, test)
global sequence_length
trainx = [ normalized_train[i:i+sequence_length] for i in range(len(normalized_train) - sequence_length-2)]
testx = [ normalized_test[i:i+sequence_length] for i in range(len(normalized_test) - sequence_length-2)]
trainy = [ normalized_train[i] for i in range(sequence_length+1, len(normalized_train))]
testy = [ normalized_test[i] for i in range(sequence_length+1, len(normalized_test))]
return trainx, testx, trainy, testy
# Create Dict function
def create_dict(x,y):
feature_dict = dict()
global sequence_length
for i in range(sequence_length):
temp = "number"+str(i+1)
feature_dict[temp] = x[:,i]
labels = y
return feature_dict, labels
# Training input function
def train_input_fn(features, labels, batch_size):
# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
# Shuffle, repeat, and batch the examples.
dataset = dataset.shuffle(100).repeat().batch(batch_size)
# Return the read end of the pipeline.
return dataset.make_one_shot_iterator().get_next()
# Create feature columns
def create_feature_columns():
feature_column = []
for i in range(sequence_length):
feature_column.append(tf.feature_column.numeric_column(key="number"+str(i+1)))
return feature_column
# Model_ function
def my_model_fn(features, labels, mode, params):
net = tf.feature_column.input_layer(features, params['feature_columns'])
for units in params['hidden_units']:
net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
logits = tf.layers.dense(net, params['n_classes'], activation=None)
def main():
global sequence_length, batch_size, n_epochs, n_neurons
trainx, testx, trainy, testy = load_data("train.csv", "test.csv")
trainx, testx, trainy, testy = np.array(trainx), np.array(testx), np.array(trainy), np.array(testy )
n_train = len(trainx)
n_test = len(testx)
feature_dict_train, label_train = create_dict(trainx, trainy)
feature_dict_test, label_test = create_dict(testx, testy)
feature_column = create_feature_columns()
main()
THANKS IN ADVANCE!