Related
I am attempting to reduce the dimensionality of a categorical feature by extracting an embedding layer from a neural net and using it as an input feature in a separate XGBoost model.
An embedding layer has the dimensions (nr. unique categories + 1, chosen output size). How can it be concatenated to the continuous variables in the original training data with the dimensions (nr. observations, nr. features)?
Below is a reproducible example of regression with a neural net, in which a categorical feature is encoded as a learned embedding layer. The example is closely adapted from:
http://machinelearningmechanic.com/keras/2018/03/09/keras-regression-with-categorical-variable-embeddings-md.html#Define-the-input-layers
At the end I have printed the embedding layer and its shape. How can this layer be merged with the continuous features in the original training data (X_train_continuous)? If the number of rows were equal to the number of categories and if we knew the order in which categories are represented in the embedding layer, the embedding array could perhaps be joined to the training observations on category, but instead the number of rows equals the number of categories + 1 (in the code: len(values) + 1).
# Imports and helper functions
import numpy as np
import pandas as pd
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization
from keras.layers import Input, Embedding, Dense
from keras.models import Model
from keras.callbacks import Callback
import matplotlib.pyplot as plt
# Bayesian Methods for Hackers style sheet
plt.style.use('bmh')
np.random.seed(1234567890)
class PeriodicLogger(Callback):
"""
A helper callback class that only prints the losses once in 'display' epochs
"""
def __init__(self, display=100):
self.display = display
def on_train_begin(self, logs={}):
self.epochs = 0
def on_epoch_end(self, batch, logs={}):
self.epochs += 1
if self.epochs % self.display == 0:
print("Epoch: %d - loss: %f - val_loss: %f" % (
self.epochs, logs['loss'], logs['val_loss']))
periodic_logger_250 = PeriodicLogger(250)
# Define the mapping and a function that computes the house price for each
# example
per_meter_mapping = {
'Mercaz': 500,
'Old North': 350,
'Florentine': 230
}
per_room_additional_price = {
'Mercaz': 15. * 10 ** 4,
'Old North': 8. * 10 ** 4,
'Florentine': 5. * 10 ** 4
}
def house_price_func(row):
"""
house_price_func is the function f(a,s,n).
:param row: dict (contains the keys: ['area', 'size', 'n_rooms'])
:return: float
"""
area, size, n_rooms = row['area'], row['size'], row['n_rooms']
return size * per_meter_mapping[area] + n_rooms * \
per_room_additional_price[area]
# Create toy data
AREAS = ['Mercaz', 'Old North', 'Florentine']
def create_samples(n_samples):
"""
Helper method that creates dataset DataFrames
Note that the np.random.choice call only determines the number of rooms and the size of the house
(the price, which we calculate later, is deterministic)
:param n_samples: int (number of samples for each area (suburb))
:return: pd.DataFrame
"""
samples = []
for n_rooms in np.random.choice(range(1, 6), n_samples):
samples += [(area, int(np.random.normal(25, 5)), n_rooms) for area in
AREAS]
return pd.DataFrame(samples, columns=['area', 'size', 'n_rooms'])
# Create the train and validation sets
train = create_samples(n_samples=1000)
val = create_samples(n_samples=100)
# Calculate the prices for each set
train['price'] = train.apply(house_price_func, axis=1)
val['price'] = val.apply(house_price_func, axis=1)
# Define the features and the y vectors
continuous_cols = ['size', 'n_rooms']
categorical_cols = ['area']
y_col = ['price']
X_train_continuous = train[continuous_cols]
X_train_categorical = train[categorical_cols]
y_train = train[y_col]
X_val_continuous = val[continuous_cols]
X_val_categorical = val[categorical_cols]
y_val = val[y_col]
# Normalization
# Normalizing both train and test sets to have 0 mean and std. of 1 using the
# train set mean and std.
# This will give each feature an equal initial importance and speed up the
# training time
train_mean = X_train_continuous.mean(axis=0)
train_std = X_train_continuous.std(axis=0)
X_train_continuous = X_train_continuous - train_mean
X_train_continuous /= train_std
X_val_continuous = X_val_continuous - train_mean
X_val_continuous /= train_std
# Build a model using a categorical variable
# First let's define a helper class for the categorical variable
class EmbeddingMapping():
"""
Helper class for handling categorical variables
An instance of this class should be defined for each categorical variable
we want to use.
"""
def __init__(self, series):
# get a list of unique values
values = series.unique().tolist()
# Set a dictionary mapping from values to integer value
# In our example this will be {'Mercaz': 1, 'Old North': 2,
# 'Florentine': 3}
self.embedding_dict = {value: int_value + 1 for int_value, value in
enumerate(values)}
# The num_values will be used as the input_dim when defining the
# embedding layer.
# It will also be returned for unseen values
self.num_values = len(values) + 1
def get_mapping(self, value):
# If the value was seen in the training set, return its integer mapping
if value in self.embedding_dict:
return self.embedding_dict[value]
# Else, return the same integer for unseen values
else:
return self.num_values
# Create an embedding column for the train/validation sets
area_mapping = EmbeddingMapping(X_train_categorical['area'])
X_train_categorical = \
X_train_categorical.assign(area_mapping=X_train_categorical['area']
.apply(area_mapping.get_mapping))
X_val_categorical = \
X_val_categorical.assign(area_mapping=X_val_categorical['area']
.apply(area_mapping.get_mapping))
# Define the input layers
# Define the embedding input
area_input = Input(shape=(1,), dtype='int32')
# Decide to what vector size we want to map our 'area' variable.
# I'll use 1 here because we only have three areas
embeddings_output = 2
# Let’s define the embedding layer and flatten it
area_embedings = Embedding(output_dim=embeddings_output,
input_dim=area_mapping.num_values,
input_length=1, name="embedding_layer")(area_input)
area_embedings = keras.layers.Reshape((embeddings_output,))(area_embedings)
# Define the continuous variables input (just like before)
continuous_input = Input(shape=(X_train_continuous.shape[1], ))
# Concatenate continuous and embeddings inputs
all_input = keras.layers.concatenate([continuous_input, area_embedings])
# To merge them together we will use Keras Functional API
# Will define a simple model with 2 hidden layers, with 25 neurons each.
# Define the model
units=25
dense1 = Dense(units=units, activation='relu')(all_input)
dense2 = Dense(units, activation='relu')(dense1)
predictions = Dense(1)(dense2)
# Note using the input object 'area_input' not 'area_embeddings'
model = Model(inputs=[continuous_input, area_input], outputs=predictions)
# Lets train the model
epochs = 100 # to train properly, use 10000
model.compile(loss='mse',
optimizer=keras.optimizers.Adam(lr=.8, beta_1=0.9,
beta_2=0.999, decay=1e-03,
amsgrad=True))
# Note continuous and categorical columns are inserted in the same order as
# defined in all_inputs
history = model.fit([X_train_continuous, X_train_categorical['area_mapping']],
y_train, epochs=epochs, batch_size=128, callbacks=[
periodic_logger_250], verbose=0,
validation_data=([X_val_continuous, X_val_categorical[
'area_mapping']], y_val))
# Observe the embedding layer
embeddings_output = model.get_layer('embedding_layer').get_weights()[0]
print(f'Embedding layer:\n{embeddings_output}')
print(f'Embedding layer shape: {embeddings_output.shape}')
First, this post has a terminology problem: an "embedding" is the representation of a particular input sample. It is the vector output by a layer. The "weights" are the matrices stored and trained inside the layer.
In Keras, the Model class is a subclass of Layer. You can use any Model as a Layer in a larger model.
You can create a Model with just the Embedding layer, then use it as a layer when building the rest of your model. After training, you can call .predict() on that "sub-model". Also, you can save that sub-model out to a json file and reload it later.
This is the standard technique for creating a model that emits internal embeddings.
To get the embedding layer outputs with shape (nr. samples, chosen output size):
intermediate_layer_model = Model(inputs=model.input,
outputs=model.get_layer("embedding_layer")
.output)
embedding_output = \
intermediate_layer_model.predict([X_train_continuous,
X_train_categorical['area_mapping']])
print(embedding_output.shape) # (3000, 1, 2)
intermediate_output = \
embedding_output.reshape(embedding_output.shape[0], -1)
print(intermediate_output.shape) # (3000, 2)
One thing you can do is to run your 'pretrained' model with each layer having a unique name and save it
Then, create your new model, with the same named layers you want to keep, and use
Model.load_weights(file_path, by_name=True)
This will let you keep all of the layers that you want and let you change everything afterwards
My data frame is on an hourly basis (index of my df) and I want to predict y.
> df.head()
Date y
2019-10-03 00:00:00 343
2019-10-03 01:00:00 101
2019-10-03 02:00:00 70
2019-10-03 03:00:00 67
2019-10-03 04:00:00 122
I will now import the libraries and train the model:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler()
prediction_hours = 24
df_train= df[:len(df)-prediction_hours]
df_test= df[len(df)-prediction_hours:]
print(df_train.head())
print('/////////////////////////////////////////')
print (df_test.head())
training_set = df_train.values
training_set = min_max_scaler.fit_transform(training_set)
x_train = training_set[0:len(training_set)-1]
y_train = training_set[1:len(training_set)]
x_train = np.reshape(x_train, (len(x_train), 1, 1))
num_units = 2
activation_function = 'sigmoid'
optimizer = 'adam'
loss_function = 'mean_squared_error'
batch_size = 10
num_epochs = 100
regressor = Sequential()
regressor.add(LSTM(units = num_units, activation = activation_function, input_shape=(None, 1)))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = optimizer, loss = loss_function)
regressor.fit(x_train, y_train, batch_size = batch_size, epochs = num_epochs)
And after training, I can actually use it on my test data:
test_set = df_test.values
inputs = np.reshape(test_set, (len(test_set), 1))
inputs = min_max_scaler.transform(inputs)
inputs = np.reshape(inputs, (len(inputs), 1, 1))
predicted_y = regressor.predict(inputs)
predicted_y = min_max_scaler.inverse_transform(predicted_y)
This is the prediction I got:
The forecast is actually pretty good: is it too good to be true? Am I doing anything wrong? I followed the implementation step by step from a GitHub implementation.
I want to add some exogenous variables, namely v1, v2, v3. If my dataset now looks like this with new variables,
df.head()
Date y v1 v2 v3
2019-10-03 00:00:00 343 4 6 10
2019-10-03 01:00:00 101 3 2 24
2019-10-03 02:00:00 70 0 0 50
2019-10-03 03:00:00 67 0 4 54
2019-10-03 04:00:00 122 3 3 23
How can I include these variables v1,v2 and v3 in my LSTM model? The implementation of the multivariate LSTM is very confusing to me.
Edit to answer Yoan suggestion:
For a dataframe with the date as index and with the columns y, v1, v2 and v3, I've done the following as suggested:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler()
prediction_hours = 24
df_train= df[:len(df)-prediction_hours]
df_test= df[len(df)-prediction_hours:]
print(df_train.head())
print('/////////////////////////////////////////')
print (df_test.head())
training_set = df_train.values
training_set = min_max_scaler.fit_transform(training_set)
x_train = np.reshape(x_train, (len(x_train), 1, 4))
y_train = training_set[0:len(training_set),1] #I've tried with 0:len.. and
#for 1:len..
num_units = 2
activation_function = 'sigmoid'
optimizer = 'adam'
loss_function = 'mean_squared_error'
batch_size = 10
num_epochs = 100
regressor = Sequential()
regressor.add(LSTM(units = num_units, activation = activation_function,
input_shape=(None, 1,4)))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = optimizer, loss = loss_function)
regressor.fit(x_train, y_train, batch_size = batch_size, epochs =
num_epochs)
But I get the following error:
only integer scalar arrays can be converted to a scalar index
Combining auxiliary features with sequences
There are multiple ways of handling auxiliary features with LSTMs and all of these are inspired by what your data contains and how you want to model these features. I'll discuss 4 different scenarios and strategies for your reference below with some dummy code.
Scenario 1: If you have simple continuous features, simply pass them into an LSTM!
Scenario 2: If you have multiple label encoded sequences, embed and then encode them separately in LSTMs after which concatenate them for your downstream predictions
If you have a label encoded sequence and some auxiliary features, you can -
Scenario 3: Append these after embedding them and then pass them into the LSTMs
Scenario 4: Append them to the output of the LSTM and choose to pass them to another set of LSTMs
Scenario 1:
Let's say you have 4 sequential features and all of those are continuous (not label encoded as in text or categorical). In this case, LSTMs are well equipped to handle these features directly. An LSTM layer expects a shape of (batch, sequence, features) and therefore such a scenario fits nicely without any modifications.
Features --> LSTM --> Process --> Predict
Code
from tensorflow.keras import layers, Model, utils
#Four continuous features
X = np.random.random((100,10,4))
Y = np.random.random((100,))
###Define model###
inp = layers.Input((10,4))
#LSTMs
x = layers.LSTM(8, return_sequences=True)(inp)
x = layers.LSTM(8)(x)
out = layers.Dense(1)(x)
model = Model(inp, out)
utils.plot_model(model, show_layer_names=False, show_shapes=True)
Scenario 2:
Next, let's assume another simple case. You have 2 labels encoded sequences (say text). As one would think, all you would want to do is separately create sequential features by building LSTMs for each of them and then concatenating them at the end before your downstream prediction task.
Sequence --> Embed --> LSTM -->|
* --> Append --> Process --> Predict
Sequence --> Embed --> LSTM -->|
Code
from tensorflow.keras import layers, Model, utils
#Two sequential, label encoded features
X = np.random.random((100,10,2))
Y = np.random.random((100,))
###Define model###
inp = layers.Input((10,2))
feature1 = layers.Lambda(lambda x: x[...,0])(inp)
feature2 = layers.Lambda(lambda x: x[...,1])(inp)
#Append embeddings features
x1 = layers.Embedding(1000, 5)(feature1)
x2 = layers.Embedding(1200, 7)(feature2)
#LSTMs
x1 = layers.LSTM(8, return_sequences=True)(x1)
x1 = layers.LSTM(8)(x1)
x2 = layers.LSTM(8, return_sequences=True)(x2)
x2 = layers.LSTM(8)(x2)
#Combine LSTM final states
x = layers.concatenate([x1,x2])
out = layers.Dense(1)(x)
model = Model(inp, out)
utils.plot_model(model, show_layer_names=False, show_shapes=True)
Scenario 3:
Next scenario, let's assume you are working with one feature which is label encoded sequence (say text). Before you pass this feature to LSTMs you will have to encode it into an n dimensional vector it using an embedding layer. This will result in a (batch, sequence, embedding_dim) shaped input for the LSTMs which is no problem at all. Let's say, however, you also have 3 auxiliary features which are continuous (and properly normalized). One simply thing you could do is just append these to the output of the Embedding layer to get a (batch, sequence, embedding_dims+auxiliary) input which the LSTM can handle as well!
Sequence --> Embed ----->|
*--> Append --> LSTM -> Process --> Predict
Auxiliary --> Process -->|
Code
from tensorflow.keras import layers, Model, utils
#One sequential, label encoded feature & 3 auxilary features for each timestep
X = np.random.random((100,10,4))
Y = np.random.random((100,))
###Define model###
inp = layers.Input((10,4))
feature1 = layers.Lambda(lambda x: x[...,0])(inp)
feature2 = layers.Lambda(lambda x: x[...,1:4])(inp)
#Append embeddings features
x = layers.Embedding(1000, 5)(feature1)
x = layers.concatenate([x, feature2])
#LSTMs
x = layers.LSTM(8, return_sequences=True)(x)
x = layers.LSTM(8)(x)
out = layers.Dense(1)(x)
model = Model(inp, out)
utils.plot_model(model, show_layer_names=False, show_shapes=True)
In the above example, after the label encoded input is embedded into the 5-dimensional vector, the 3 auxiliary inputs are appended and then the (10,8) dimensional sequence is passed to the LSTMs for doing their magic.
Scenario 4:
Let's say you have the same scenario as above, but you want the sequential features to be richer representations before you append the auxilary inputs. Here you could simply pass the sequential feature to an LSTM and append the auxiliary input to the OUTPUT of the LSTM and then decide to pass it into another LSTM if needed. This will require you to return_sequences=True so that you can get the same length sequence which can be appended to the auxiliary features for that set of time steps.
Sequence --> Embed --> LSTM(seq) -->|
*--> Append --> Process --> Predict
Auxiliary --> Process ------------->|
Code
from tensorflow.keras import layers, Model, utils
#One sequential, label and 3 auxilary continous features
X = np.random.random((100,10,4))
Y = np.random.random((100,))
###Define model###
inp = layers.Input((10,4))
feature1 = layers.Lambda(lambda x: x[...,0])(inp)
feature2 = layers.Lambda(lambda x: x[...,1:4])(inp)
#feature2 = layers.Reshape((-1,1))(feature2)
#Append embeddings features
x = layers.Embedding(1000, 5)(feature1)
#LSTMs
x = layers.LSTM(8, return_sequences=True)(x)
x = layers.concatenate([x, feature2])
x = layers.LSTM(8)(x)
#Combine LSTM final states
out = layers.Dense(1)(x)
model = Model(inp, out)
utils.plot_model(model, show_layer_names=False, show_shapes=True)
There are architectures that add a single feature to the output of an LSTM and encode them again in an LSTM, after which they add the next feature and so on instead of adding all of them together. That is a design choice and will have to be tested for your specific data.
Hope this clarifies your question.
Keras default implementation of LSTM expect input shape : (batch, sequence, features).
So when reshaping x_train instead of doing :
x_train = np.reshape(x_train, (len(x_train), 1, 1))
You simply have :
x_train = np.reshape(x_train, (len(x_train), 1, num_features))
It's not clear from your post whether you also want to predict this new features (multivariate prediction) or if you still want to predict y only.
In the first case you'll need to modify your Dense layer to account for the new dimension of the target :
regressor.add(Dense(units = num_features))
In the second case you'll need to reshape y_train to take only y
y_train = training_set[1:len(training_set),1] # (assuming Date is not the index)
Finally your LSTM input shape must be updated to accept the new reshaped x_train :
regressor.add(LSTM(units = num_units, activation = activation_function, input_shape=(None, 1, num_features)))
I got an error:
Error when checking input:
expected embedding_1_input to have shape (50,) but got array with shape (1,)
When I change the input parameter input_length to 1, the error becomes:
Error when checking input:
expected embedding_1_input to have shape (1,) but got array with shape (50,)
My code is as below:
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
import numpy as np
import os
from keras import metrics
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional, BatchNormalization, Activation, Conv1D, MaxPooling1D, Flatten, GlobalMaxPooling1D
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.preprocessing import sequence, text
import pandas as pd
from gensim import corpora
from gensim import models
maxlen = 50
batch_size = 128
np.random.seed(7)
df = pd.read_csv('C:/Users/DMY/Peer-logic-master/newdata/topnine.csv',encoding='utf-8')
x = df["REVIEW"].fillna("na").values
y = df["TAG"]
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.1)
word_list = []
for i in range(len(x_train)):
word_list.append(x_train[i].split(' '))
dictionary = corpora.Dictionary(word_list)
corpus = [dictionary.doc2bow(text) for text in word_list]
tfidf = models.TfidfModel(corpus)
X_train_id = []
word_id_dict = dictionary.token2id
for i in range(len(word_list)):
sen_id = []
word_sen = word_list[i]
for j in range(len(word_sen)):
id = word_id_dict.get(word_sen[j])
if id is None:
id = 0
sen_id.append(id)
X_train_id.append(sen_id)
X_train_tfidf_vec = []
for i in range(len(x_train)):
temp = {}
string = x_train[i]
string_bow = dictionary.doc2bow(string.lower().split())
string_tfidf = tfidf[string_bow]
for j in range(len(string_tfidf)):
# print(string_tfidf[j][0])
temp[string_tfidf[j][0]] = string_tfidf[j][1]
# print(temp)
X_train_tfidf_vec.append(temp)
X_train_tfidf = []
for i in range(len(X_train_id)):
sen_id = X_train_id[i]
sen_id_tfidf = X_train_tfidf_vec[i]
sen = []
for j in range(len(sen_id)):
word_id = sen_id[j]
word_tfidf = sen_id_tfidf.get(word_id)
if word_tfidf is None:
word_tfidf = 0
sen.append(word_tfidf)
X_train_tfidf.append(sen)
x_train_tfidf = sequence.pad_sequences(X_train_tfidf, maxlen=maxlen,dtype='float64')
#print(len(x_train_tfidf))
#print(x_train_tfidf)
model4 = Sequential()
model4.add(Embedding(len(x_train_tfidf)+1, 100, input_length = ))#input_dim,output_dim,input_length
model4.add(Dropout(0.6))
model4.add(LSTM(100, recurrent_dropout=0.6))
model4.add(Dropout(0.6))
model4.add(Dense(1, activation='sigmoid'))
model4.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
model4_history = model4.fit(x_train_tfidf, y_train, batch_size=batch_size, epochs=7,
validation_split=0.1)
score4, acc4 = model4.evaluate(x_test, y_test,
batch_size=batch_size)
print('Test accuracy for LSTM Model is:', acc4)
y_pred4 = model4.predict(x_test)
y_pred4 = (y_pred4 > 0.5)
print(classification_report(y_test, y_pred4))
According to the official documentation, the embedding layer takes a number of different initialization parameters.
Embedding(input_dim,
output_dim,
embeddings_initializer='uniform',
embeddings_regularizer=None,
activity_regularizer=None,
embeddings_constraint=None,
mask_zero=False,
input_length=None)
Note that input_dim specifies the vocabulary size of the input data, i.e. how many different words there are in the data. The output_dim specifies the dimensionality of the latent space, i.e. how long the vector for each word will be after the embedding transformation.
Let's put this into perspective with an example. Consider the following layer:
model.add(Embedding(1000, 64, input_length=10))
This specifies the fact that
There are 1000 unique words in the data
Each word will be represented as 64-dimensional vectors
Instances in the data will be phrases of 10 words
The output of this layer will be of shape (None, 10, 64), where None is the batch size. In your case, input_length should probably set to be equal to maxlen, which seems to specify the size of each instance in the input data.
Hi I have been working on an LSTM network in python using Keras. I created a 1D array for my training and test set. When I try to fit the model I get the following error:
ValueError: Error when checking input: expected lstm_31_input to have 3 dimensions, but got array with shape (599, 1)
I have tried resizing the dimension and the add(Flatten) layer. None of this work. My code is below:
#Setup
import pandas as pd
import numpy as np
from numpy import array, zeros, newaxis
from numpy import argmax
from keras.layers.core import Dense, Activation, Dropout
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding, Flatten
from keras.layers import LSTM
#Used to ignore warning about some of the tensor command being depracated
#Code from:
#https://stackoverflow.com/questions/43819820/how-to-disable-keras-warnings
#import warnings
#with warnings.catch_warnings():
# warnings.simplefilter("ignore")
"""
#Allow use of modules from the Common_Functions Folder
import sys
sys.path.append('../_Common_Functions')
import Hello_World as helloWorld
"""
#Creates dataset of random numbers
#import numpy as np
from random import random
def generateDatset(n):
val = np.array([])
typ = np.array([])
for i in range (1, n):
val = np.append(val, round(random()*10, 2))
if val[i-1] < 3 or val[i-1] > 7:
typ = np.append(typ, 'm')
else:
typ = np.append(typ, 'f')
return val, typ
# Encode the output labels
def lable_encoding(gender_series):
labels = np.empty((0, 2))
for i in gender_series:
if i == 'm':
labels = np.append(labels, [[1,0]], axis=0)
else:
labels = np.append(labels, [[0,1]], axis=0)
return labels
#Gets dataset in proper format for this program
val, typ = generateDatset(1000)
df = pd.DataFrame( {"first_name": val[:], "gender": typ[:]} )
# Split dataset in 60% train, 20% test and 20% validation
train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])
# Convert both the input names as well as the output lables into the discussed machine readable vector format
train_x = np.asarray(train.first_name)
#train_x = np.reshape(train_x, train_x.shape + (1,))
#train_x = np.reshape(train_x, (train_x.shape[0], 1, train_x.shape[1]))
train_y = lable_encoding(train.gender)
#train_y = np.reshape(train_y, train_y.shape + (1,))
#train_y = np.reshape(train_y, (train_y.shape[0], 1, train_y.shape[1]))
validate_x = np.asarray(validate.first_name)
#validate_x = np.reshape(validate_x, validate_x.shape + (1,))
validate_y = lable_encoding(validate.gender)
#validate_y = np.reshape(validate_y, validate_y.shape + (1,))
test_x = np.asarray(test.first_name)
#test_x = np.reshape(test_x, test_x.shape + (1,))
test_y = lable_encoding(test.gender)
#test_x = np.reshape(test_x, test_x.shape + (1,))
"""
The number of hidden nodes can be determined by the following equation:
Nh = (Ns/ (alpha * Ni + No ) )
Where Ni --> number of input neurons
No --> number of output neurons
Ns --> number of samples
alph --> scaling factor
Alternatively the following equation can be used:
Nh = (2/3)*(Ni + No)
As a not this equation is simpler but may not provide the best performance
"""
#Set a value for the scaling factor.
#This typically ranges between 2 and 10
alpha = 8
hidden_nodes = int(np.size(train_x) / (alpha * ((len(df.columns)-1)+ 4)))
input_length = train_x.shape # Length of the character vector
output_labels = 2 # Number of output labels
from keras import optimizers
# Build the model
print('Building model...')
model = Sequential()
#print(train_x.shape)
#
df = np.expand_dims(df, axis=2)
model.add(LSTM(hidden_nodes, return_sequences=True, input_shape=(599, 1)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(units=output_labels))
model.add(Activation('softmax'))
sgd = optimizers.SGD(lr=0.5, clipnorm=10.)
model.compile(loss='categorical_crossentropy', optimizer= sgd, metrics=['acc'])
#
batch_size=1000
#x = train_x[..., newaxis, newaxis]
#x.shape
#y = train_y[..., newaxis, newaxis]
#y.shape
model.fit(train_x, train_y, batch_size=batch_size, epochs=10)
#http://45.76.113.195/?questions/46616674/expected-ndim-3-found-ndim-2-how-to-feed-sparse-matrix-to-lstm-layer-in-keras
input_shape=(599, 1) specifies the shape of one sample.
In here your train batch size is 599 and shape of one sample is 1. Since the first layer is a LSTM layer it needs 3 dimensional input(batch_size,number_of_time_stamps_of_a_sample,diamentionality_of_one_time_stamp).But we don't mention the batch size in input_shape.So input shape of a LSTM layer should be
input_shape=(number_of_time_stamps_of_a_sample,diamentionality_of_one_time_stamp)
So u should
1)Replace input_shape=(599, 1) with input_shape=(1, 1)
2)Add the following line before training train_x=train_x.reshape(599,1,1)
To be more clear please refer to this video uploaded by me ;-)
I've got a problem where I want to predict one time series with many time series. My input is (batch_size, time_steps, features) and my output should be (1, time_steps, features)
I can't figure out how to average over N.
Here's a dummy example. First, dummy data where the output is a linear function of 200 time series:
import numpy as np
time = 100
N = 2000
dat = np.zeros((N, time))
for i in range(time):
dat[i,:] = np.sin(list(range(time)))*np.random.normal(size =1) + np.random.normal(size = 1)
y = dat.T # np.random.normal(size = N)
Now I'll define a time series model (using 1-D conv nets):
from keras.models import Model
from keras.layers import Input, Conv1D, Dense, Lambda
from keras.optimizers import Adam
from keras import backend as K
n_filters = 2
filter_width = 3
dilation_rates = [2**i for i in range(5)]
inp = Input(shape=(None, 1))
x = inp
for dilation_rate in dilation_rates:
x = Conv1D(filters=n_filters,
kernel_size=filter_width,
padding='causal',
activation = "relu",
dilation_rate=dilation_rate)(x)
x = Dense(1)(x)
model = Model(inputs = inp, outputs = x)
model.compile(optimizer = Adam(), loss='mean_squared_error')
model.predict(dat.reshape(N, time, 1)).shape
Out[43]: (2000, 100, 1)
The output is the wrong shape! Next, I tried using an averaging layer, but I get this weird error:
def av_over_batches(x):
x = K.mean(x, axis = 0)
return(x)
x = Lambda(av_over_batches)(x)
model = Model(inputs = inp, outputs = x)
model.compile(optimizer = Adam(), loss='mean_squared_error')
model.predict(dat.reshape(N, time, 1)).shape
Traceback (most recent call last):
File "<ipython-input-3-d43ccd8afa69>", line 4, in <module>
model.predict(dat.reshape(N, time, 1)).shape
File "/home/me/.local/lib/python3.6/site-packages/keras/engine/training.py", line 1169, in predict
steps=steps)
File "/home/me/.local/lib/python3.6/site-packages/keras/engine/training_arrays.py", line 302, in predict_loop
outs[i][batch_start:batch_end] = batch_out
ValueError: could not broadcast input array from shape (100,1) into shape (32,1)
Where does 32 come from? (Incidentally, I got the same number in my real data, not just in the MWE).
But the main question is: how can I build a network that averages over the input batch dimension?
I would approach the problem in a different way
Problem: You want to predict a time series from a set of time series. so lets say you have 3 time series value TS1, TS2, TS3 each of 100 time steps you want to predict a time series y1, y2, y3.
My approach for this problem will be as below
i.e group the times series each time step together and feed it to an LSTM. If some time steps are shorter then others them you can pad them. Similarly if some sets have fewer time series then again pad them.
Example:
import numpy as np
np.random.seed(33)
time = 100
N = 5000
k = 5
magic = np.random.normal(size = k)
x = list()
y = list()
for i in range(N):
dat = np.zeros((k, time))
for i in range(k):
dat[i,:] = np.sin(list(range(time)))*np.random.normal(size =1) + np.random.normal(size = 1)
x.append(dat)
y.append(dat.T # magic)
So I want to predict a timeseries of 100 steps from a set of 3 times steps. We want to the model to learn the magic.
from keras.models import Model
from keras.layers import Input, Conv1D, Dense, Lambda, LSTM
from keras.optimizers import Adam
from keras import backend as K
import matplotlib.pyplot as plt
input = Input(shape=(time, k))
lstm = LSTM(32, return_sequences=True)(input)
output = Dense(1,activation='sigmoid')(lstm)
model = Model(inputs = input, outputs = output)
model.compile(optimizer = Adam(), loss='mean_squared_error')
data_x = np.zeros((N,100,5))
data_y = np.zeros((N,100,1))
for i in range(N):
data_x[i] = x[i].T.reshape(100,5)
data_y[i] = y[i].reshape(100,1)
from sklearn.preprocessing import StandardScaler
ss_x = StandardScaler()
ss_y = StandardScaler()
data_x = ss_x.fit_transform(data_x.reshape(N,-1)).reshape(N,100,5)
data_y = ss_y.fit_transform(data_y.reshape(N,-1)).reshape(N,100,1)
# Lets leave the last one sample for testing rest split into train and validation
model.fit(data_x[:-1],data_y[:-1], batch_size=64, nb_epoch=100, validation_split=.25)
The val loss was going down still but I stoped it. Lets see how good our prediction is
y_hat = model.predict(data_x[-1].reshape(-1,100,5))
plt.plot(data_y[-1], label='y')
plt.plot(y_hat.reshape(100), label='y_hat')
plt.legend(loc='upper left')
The results are promising. Running it for more epochs and also hyper parameter tuning should further bring us close the the magic. One can also try stacked LSTM and bi-directional LSTM.
I feel RNNs are better suited for time series data rather then CNN's
Data Format:
Lets say time steps = 3
Time series 1 = [1,2,3]
Time series 2 = [4,5,6]
Time series 3 = [7,8,9]
Time series 3 = [10,11,12]
Y = [100,200,300]
For a batch size of 1
[[1,4,7,10],[2,5,8,11],[3,6,9,12]] -> LSTM -> [100,200,300]