how should I specify batch size in LSTM?

how should I specify batch size in LSTM? - python

Is it possible to look at this code in LSTM? I want to train the data with the shape which I put here but I receive an error regarding the size of the batch I think so. I do not know which size of the batch. currently, the size of a batch that I choose is 64. should I put another size for the batch or the error is not related to the size of the batch?
should I choose for this code: the shape of X (7311, 17, 124) and shape of Y(7311, 1)
InvalidArgumentError: Incompatible shapes: [16] vs. [64]
[[node gradient_tape/binary_crossentropy/weighted_loss/Mul (defined at <ipython-input-74-f95f7e276c58>:1) ]] [Op:__inference_train_function_138498]
df = pd.read_csv("train_data.csv")
timestep = 17 #from 1 to 23 (17 with the current NaN strategy)
threshold_for_classification = -8
X_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()
fill_X = -0.01
seed = 11
#RNN hiperparameter
epochs = 75
batch = 64
val_split = 0.25
test_split = 0.25
lr = 0.0001
adam = optimizers.Nadam() #(lr)
class_weight = {True: 5.,
False: 1.}
verbose = 1
#Dropping first the empty column and then rows with NaNs
df = df.drop("c_rcs_estimate", axis=1)
df = df.dropna(how='any')
#Filtering events with len=1 or min_tca > 2 or max_tca < 2
def conditions(event):
x = event["time_to_tca"].values
return ((x.min()<2.0) & (x.max()>2.0) & (x.shape[0]>1))
df = df.groupby('event_id').filter(conditions)
#OHE for c_object_type (5 categories) -> 5 new features
df["mission_id"] = df["mission_id"].astype('category')
df["c_object_type"] = df["c_object_type"].astype('category')
df = pd.get_dummies(df)
#Getting y as 1D-array
y = df.groupby(["event_id"])["risk"].apply(lambda x: x.iloc[-1]).values.reshape(-1, 1)
#Scaling y
_ = y_scaler.fit(df["risk"].values.reshape(-1, 1)) #using the whole risk feature to scale the target 'y'
y = y_scaler.transform(y)
#Getting X as df (dropping rows with tca < 2)
df = df.loc[df["time_to_tca"]>2]
#Adding feature 'event_length' for counting how many instances each event has
df["event_length"] = df.groupby('event_id')['event_id'].transform(lambda x: x.value_counts().idxmax())
#Scaling X
df = pd.DataFrame(X_scaler.fit_transform(df), columns=df.columns)
#Transforming X into a 3D-array
events = df["event_id"].nunique() #rows
features = len(df.columns) #columns
X = np.zeros((events,timestep,features))
X.fill(fill_X)
i = 0
def df_to_3darray(event):
global X, i
#Transforming an event to time series (1,timesteps, columns)
row = event.values.reshape(1,event.shape[0],event.shape[1])
#Condition is needed to slice arrays correctly
#Condition -> is timestep greater than the event's time series length?
if(timestep>=row.shape[1]):
X[i:i+1,-row.shape[1]:,:] = row
else:
X[i:i+1,:,:] = row[:,-timestep:,:]
#index to iterate over X array
i = i + 1
#dataframe remains intact, while X array has been filled.
return event
df.groupby("event_id").apply(df_to_3darray)
#Dropping event_id to remove noise
X = X[:,:,1:]
#TODO: Padding with specific values column-wise instead of zeros.
#TODO: Separating time dependent and independent feature in 2 X arrays
print(X.shape, y.shape)
#computing scaled threshold
th = np.array([threshold_for_classification]).reshape(-1,1)
th = y_scaler.transform(th)
threshold_scaled = th[0,0]
#Splitting arrays
y_boolean = (y > threshold_scaled).reshape(-1,1)
X_train, X_test, y_train_numeric, y_test_numeric = train_test_split(X, y,
stratify=y_boolean,
shuffle=True,
random_state=seed,
test_size = test_split
)
y_train_boolean = (y_train_numeric > threshold_scaled).reshape(-1,1)
X_train, X_val, y_train_numeric, y_val_numeric = train_test_split(X_train, y_train_numeric,
stratify=y_train_boolean,
shuffle=True,
random_state=seed,
test_size = val_split
)
#transforming it into a classification task -> y_train, y_test boolean
y_train = (y_train_numeric > threshold_scaled).reshape(-1,1)
y_val = (y_val_numeric > threshold_scaled).reshape(-1,1)
y_test = (y_test_numeric > threshold_scaled).reshape(-1,1)
X_train = tf.convert_to_tensor(X_train,dtype=tf.int64)
X_test = tf.convert_to_tensor( X_test,dtype=tf.int64)
y_train_numeric = tf.convert_to_tensor(y_train_numeric,dtype=tf.int64)
y_test_numeric = tf.convert_to_tensor(y_test_numeric,dtype=tf.int64)
y_train_boolean = tf.convert_to_tensor(y_train_boolean,dtype=tf.int64)
X_val = tf.convert_to_tensor(X_val,dtype=tf.int64)
y_val_numeric = tf.convert_to_tensor(y_val_numeric,dtype=tf.int64)
y_train = tf.convert_to_tensor(y_train,dtype=tf.int64)
y_val = tf.convert_to_tensor(y_val,dtype=tf.int64)
y_test = tf.convert_to_tensor(y_test,dtype=tf.int64)
y_boolean = tf.convert_to_tensor(y_boolean,dtype=tf.int64)
#Percentage of high risks in train
print("TRAIN {:0.1f}, {:0.1f}, {:0.3f}".format(np.sum(y_train), y_train.shape[0], np.sum(y_train)/y_train.shape[0]))
#Percentage of high risks in val
print("VAL {:0.1f}, {:0.1f}, {:0.3f}".format(np.sum(y_val), y_val.shape[0], np.sum(y_val)/y_val.shape[0]))
#Percentage of high risks in test
print("TEST {:0.1f}, {:0.1f}, {:0.3f}".format(np.sum(y_test), y_test.shape[0], np.sum(y_test)/y_test.shape[0]))
# Model activation selu
input_tensor = Input(batch_shape=(batch, timestep, X_train.shape[2]))
rnn_1 = LSTM(32, stateful=False, dropout=0.15, recurrent_dropout=0.3, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.01))(input_tensor)
batch_1 = BatchNormalization()(rnn_1)
rnn_2 = LSTM(16, stateful=False, dropout=0.15, recurrent_dropout=0.3, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.01))(batch_1)
batch_2 = BatchNormalization()(rnn_2)
rnn_3 = LSTM(8, stateful=False, dropout=0.15, recurrent_dropout=0.3, return_sequences=False, kernel_regularizer=L1L2(l1=0.0, l2=0.01))(batch_2)
batch_3 = BatchNormalization()(rnn_3)
output_tensor = Dense(units = 1, activation='sigmoid')(batch_3)
model = Model(inputs=input_tensor,
outputs= output_tensor)
model.compile(loss='binary_crossentropy',
optimizer=adam,
metrics=['accuracy'])
model.summary()
model_history = model.fit(X_train, y_train,
epochs=epochs,
batch_size=batch,
#shuffle=True, #OJO
validation_data=(X_val, y_val),
verbose=verbose,
class_weight=class_weight
).history

I would suggest changing this line
input_tensor = Input(batch_shape=(batch, timestep, X_train.shape[2]))
to
input_tensor = tf.keras.layers.Input(shape=(timestep, X_train.shape[2]))
and then defining your batch_size in model.fit and make sure X_train and y_train have the same number of samples.

Related

Input shape issue with LSTM in keras

I'm attempting a bidirectional LSTM for datasets from a csv and training it by subsetting x and y; x has a shape of (29903, 10) and y's got a shape of (29903, 10). Regardless of adding a third dimension to x by reshaping it by (-1, 10, 1) I'm getting a value error due to variation in input sizes of 10 and 2, return_sequence set to True or otherwise.
Value Error encountered: 'ValueError: Dimensions must be equal, but are 10 and 2 for '{{node mean_absolute_error/sub}} = Sub[T=DT_FLOAT](sequential_9/bidirectional_9/concat, IteratorGetNext:1)' with input shapes: [?,10,10], [?,2,1].'
Here's the code:
`
lyst = pandas.read_csv('legasee.csv', index_col=0)
x = pandas.DataFrame(lest.iloc[:,0:10])#.values
y = pandas.DataFrame(lest.iloc[:,10:13])#.values
x.shape, y.shape, x, y
trinx, tex, triy, tey = train_test_split(x, y, test_size = 0.2, random_state = 0)
scaX = StandardScaler()
scaY = StandardScaler()
trinx = scaX.fit_transform(trinx)
triy = scaY.fit_transform(triy)
tex = scaX.fit_transform(tex)
tey= scaY.fit_transform(tey)
trinx = trinx.reshape(-1, 10, 1)
triy = triy.reshape(-1, 2, 1)
moe = keras.Sequential
(
keras.layers.Bidirectional
(
layers.LSTM(5, return_sequences=True, activation = 'tanh'),
),
# keras.layers.Flatten(),
# keras.layers.Dense(10, activation = 'tanh'),
)
moe.compile
(
loss = 'mae', #from_logits=True
optimizer=keras.optimizers.Adam(lr=0.01),
metrics=['accuracy'],
)
moe.fit(trinx, triy, batch_size=64, epochs=10, verbose=2)
`
Any help would genuinely be appreciated.

How to increase prediction horizon for LSTM network?

How to increase prediction horizon for LSTM network in the following code:
Currently it is working with one step prediction, and I would like to set different forecast horizon. Any help is much appreciated.
def LSTM_pred(data=None,draw=True,uni=False,show_model=True,train_set=None,next_pred=False,ahead=1):
if train_set is None:
trainX,trainY,scalarY,next_trainX = create_dateback(data,uni=uni,ahead=ahead)
else: trainX,trainY,scalarY,next_trainX = train_set[0],train_set[1],train_set[2],train_set[3]
if PERIODS == 0:
train_X = trainX
y_train = trainY
else:
x_train,x_test = trainX[:-PERIODS],trainX[-PERIODS:]
y_train,y_test = trainY[:-PERIODS],trainY[-PERIODS:]
train_X = x_train.reshape((x_train.shape[0], x_train.shape[1], x_train.shape[2]))
test_X = x_test.reshape((x_test.shape[0], x_test.shape[1], x_test.shape[2]))
One step prediction:
if PERIODS != 0:
pred_test = model.predict(test_X)
evl(y_test, pred_test)
else: pred_test = np.array([])
if next_pred:
next_ans = model.predict(next_trainX.reshape((1, trainX.shape[1], trainX.shape[2])))
pred_test = np.append(pred_test,next_ans)
pred_test = pred_test.ravel().reshape(-1,1)

How to plot the fitted vs actual data while model calibrates over training period in Tensorflow Python

I have a dataframe containing numerical daily data and a target variable ("Score") in the last column that I am trying to predict. The code below seems to work but I would like to visualise the results of the model fit while the model is calibrating against the actual data in the training set.
All variables are time series so they are ordered in time but the plotting I managed to test shows the actual time series of the target variable (in the training period) but for the predicted values I didn't manage to get the expected results.
If I plot them, the fitted values don't respect the time ordering of the actual data and this seems to be due to the fact that there is a shuffling happening at a earlier stage.
How can I recover the time ordering of the fitted data at each iteration so that I can compare against the actual target variable while the model calibrates?
#allData is a dataframe containing the target variable in last column
y = 'Score' #name of the target variable to predict
target = allData[y].shift(1).dropna() #shift by 1 days as I want to predict the future score
X_ = allData.drop([y], axis=1) #all features
df = pd.concat([X_, target], join='outer', axis=1).dropna() #put them all back in a dataframe
df_train = df['2015':'2018']
df_test = df[prediction[0] : prediction[1]]
#scale variables
scaler = MinMaxScaler(feature_range=(-1, 1))
train= scaler.fit_transform(df_train.values)
test = scaler.transform(df_test.values)
x_train = train[:, :-1]
y_train = train[:, -1]
x_test = test[:, :-1]
num_features = x_train.shape[1]
x = tf.placeholder(dtype=tf.float32, shape=[None, num_features])
y_ = tf.placeholder(dtype=tf.float32, shape=[None])
nl_1, nl_2, nl_3, nl_4 = 512, 256, 128, 64
wi = tf.contrib.layers.variance_scaling_initializer(mode='FAN_AVG', uniform=True, factor=1)
zi = tf.zeros_initializer()
# 4 Hidden layers
wt_hidden_1 = tf.Variable(wi([num_features, nl_1]))
bias_hidden_1 = tf.Variable(zi([nl_1]))
wt_hidden_2 = tf.Variable(wi([nl_1, nl_2]))
bias_hidden_2 = tf.Variable(zi([nl_2]))
wt_hidden_3 = tf.Variable(wi([nl_2, nl_3]))
bias_hidden_3 = tf.Variable(zi([nl_3]))
wt_hidden_4 = tf.Variable(wi([nl_3, nl_4]))
bias_hidden_4 = tf.Variable(zi([nl_4]))
# Output layer
wt_out = tf.Variable(wi([nl_4, 1]))
bias_out = tf.Variable(zi([1]))
hidden_1 = tf.nn.relu(tf.add(tf.matmul(x, wt_hidden_1), bias_hidden_1))
hidden_2 = tf.nn.relu(tf.add(tf.matmul(hidden_1, wt_hidden_2), bias_hidden_2))
hidden_3 = tf.nn.relu(tf.add(tf.matmul(hidden_2, wt_hidden_3), bias_hidden_3))
hidden_4 = tf.nn.relu(tf.add(tf.matmul(hidden_3, wt_hidden_4), bias_hidden_4))
out = tf.transpose(tf.add(tf.matmul(hidden_4, wt_out), bias_out))
mse = tf.reduce_mean(tf.squared_difference(out, y_))
optimizer = tf.train.AdamOptimizer().minimize(mse)
session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())
BATCH_SIZE = 100
EPOCHS = 100
for epoch in range(EPOCHS):
# Shuffle the training data
shuffle_data = permutation(arange(len(y_train)))
x_train = x_train[shuffle_data]
y_train = y_train[shuffle_data]
# Mini-batch training
for i in range(len(y_train)//BATCH_SIZE):
start = i*BATCH_SIZE
batch_x = x_train[start:start+BATCH_SIZE]
batch_y = y_train[start:start+BATCH_SIZE]
session.run(optimizer, feed_dict={x: batch_x, y_: batch_y})
# Show plot of fitted model against actual data
if np.mod(i, 5) == 0:
pred = session.run(out, feed_dict={x: x_train}) #x_train is scaled
dd = train.copy()
dd[:, -1] = pred[0]
pred = scaler.inverse_transform(dd) #need to rescale in order to compare with actual data
fig = plt.figure()
ax1 = fig.add_subplot(111)
line1, = ax1.plot(df_train[y].values) #the actual data in the training period
line2, = ax1.plot(pred[:, -1][::-1]) #the fitted data in the training period don't seem to be ordered in time, like the original data
plt.title('Epoch ' + str(epoch) + ', Batch ' + str(i))
plt.show()
plt.pause(0.01)

How to solve the value errors in rNN?

When I did rNN, I just got: ValueError: Error when checking input: expected lstm_2_input to have 3 dimensions, but got array with shape (99, 20)
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data)
time_window = 20
Xall, Yall = [], []
for i in range(time_window, len(data)):
Xall.append(data[i-time_window:i, 0])
Yall.append(data[i, 0])
Xall = np.array(Xall)
Yall = np.array(Yall)
train_size = int(len(Xall) * 0.8)
test_size = len(Xall) - train_size
Xtrain = Xall[:train_size, :]
Ytrain = Yall[:train_size]
Xtest = Xall[-test_size:, :]
Ytest = Yall[-test_size:]
model = Sequential()
model.add(LSTM(input_shape = (None,1),units=50,return_sequences=False))
model.add(Dense(output_dim=1))
model.add(Activation("linear"))
model.compile(loss="mse", optimizer="rmsprop")
from keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='loss', patience=2,verbose=1)
model.fit(Xtrain,Ytrain,batch_size=5,nb_epoch=20,validation_split=0.1)
allPredict = model.predict(np.reshape(Xall, (124,20,1)))
Xtrain has a size of (99, 20), while for Ytrain is (99,). I don't know where is wrong.

loss: nan using Keras vs non-nan (working) output using tensorflow

I am trying to replicate old code that I had in tensorflow but in Keras format. For some reason my loss is always nan. I think the error is in the loss that I am using ('categorical_crossentropy' in keras vs 'tf.nn.softmax_cross_entropy_with_logits' in tensorflow)
Keras code:
import keras
from keras.models import Sequential
from keras.layers import Dropout, Dense, Activation
from keras.regularizers import l2
from keras.layers.normalization import BatchNormalization
# Keras items
from keras.optimizers import Adam, Nadam
from keras.activations import relu, elu
from keras.losses import binary_crossentropy, categorical_crossentropy
from keras import metrics
import pandas as pd
import numpy as np
x_main = pd.read_csv("glioma DB X.csv")
y_main = pd.read_csv("glioma DB Y.csv")
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_main, y_main, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
# train shape
np.shape(x_train), np.shape(y_train)
((132, 47), (132, 1))
# Normalize training data; will want to have the same mu and sigma for test
def normalize_features(dataset):
mu = np.mean(dataset, axis = 0) # columns
sigma = np.std(dataset, axis = 0)
norm_parameters = {'mu': mu,
'sigma': sigma}
return (dataset-mu)/(sigma+1e-10), norm_parameters
# Normal X data; using same mu and sigma from test set;
x_train, norm_parameters = normalize_features(x_train)
x_val = (x_val-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_test = (x_test-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
params = {'lr': 0.001,
'batch_size': 30,
'epochs': 8000,
'dropout': 0.5,
'weight_regulizer':['l2'],
'optimizer': 'adam',
'losses': 'categorical_crossentropy',
'activation':'relu',
'last_activation': 'softmax'}
from keras.utils.np_utils import to_categorical
#categorical_labels = to_categorical(int_labels, num_classes=None)
if params['losses']=='categorical_crossentropy':
y_train = to_categorical(y_train,num_classes=4)
y_val = to_categorical(y_val,num_classes=4)
y_test = to_categorical(y_test,num_classes=4)
model = Sequential()
# layer 1
model.add(Dense(30, input_dim=x_train.shape[1],
W_regularizer=l2(0.01),
kernel_initializer='he_uniform'))
model.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout']))
# layer 2
model.add(Dense(20, W_regularizer=l2(0.01),
kernel_initializer='he_uniform'))
model.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout']))
# if we want to also test for number of layers and shapes, that's possible
#hidden_layers(model, params, 1)
# Last layer
model.add(Dense(4, activation=params['last_activation'],
kernel_initializer='he_uniform'))
model.compile(loss=params['losses'],
optimizer=keras.optimizers.adam(lr=params['lr']),
metrics=['categorical_accuracy'])
history = model.fit(x_train, y_train,
validation_data=[x_val, y_val],
batch_size=params['batch_size'],
epochs=params['epochs'],
verbose=1)
Working code using tensorflow which gives me a pretty loss graph haha:
x_train, x_test, y_train, y_test = train_test_split(X_main, Y_main, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
# ANOTHER OPTION IS TO USE SKLEARN sklearn.model_selection.ShuffleSplit
# look into stratification
# Normalize training data; will want to have the same mu and sigma for test
def normalize_features(dataset):
mu = np.mean(dataset, axis = 0) # columns
sigma = np.std(dataset, axis = 0)
norm_parameters = {'mu': mu,
'sigma': sigma}
return (dataset-mu)/(sigma+1e-10), norm_parameters
# TRY LOG TRANSFORMATION LOG(1+X) to deal with outliers
# change ordinal to one hot vector
# to make label encoder
# for c in x_train.columns[x_train.dtype == 'object']:
# X[c] (which was copy of xtrain) X[c].factorize()[0]
# able to plot feature importance in random forest
# Normal X data; using same mu and sigma from test set; then transposed
x_train, norm_parameters = normalize_features(x_train)
x_val = (x_val-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_test = (x_test-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_train = np.transpose(x_train)
x_val = np.transpose(x_val)
x_test = np.transpose(x_test)
y_train = np.transpose(y_train)
y_val = np.transpose(y_val)
y_test = np.transpose(y_test)
# converting values from database to matrix
x_train = x_train.as_matrix()
x_val = x_val.as_matrix()
x_test = x_test.as_matrix()
y_train = y_train.as_matrix()
y_val = y_val.as_matrix()
y_test = y_test.as_matrix()
# testing shape
#print(y_train.shape)
#print(y_val.shape)
#print(y_test.shape)
#
#print(x_train.shape)
#print(x_val.shape)
#print(x_test.shape)
# convert y to array per value so 3 = [0 0 1]
def convert_to_one_hot(Y, C):
Y = np.eye(C)[Y.reshape(-1)].T
return Y
y_train = convert_to_one_hot(y_train, 4)
y_val = convert_to_one_hot(y_val, 4)
y_test = convert_to_one_hot(y_test, 4)
print ("number of training examples = " + str(x_train.shape[1]))
print ("number of test examples = " + str(x_test.shape[1]))
print ("X_train shape: " + str(x_train.shape))
print ("Y_train shape: " + str(y_train.shape))
print ("X_test shape: " + str(x_test.shape))
print ("Y_test shape: " + str(y_test.shape))
# minibatches for later
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
"""
Creates a list of random minibatches from (X, Y)
Arguments:
X -- input data, of shape (input size, number of examples)
Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
mini_batch_size - size of the mini-batches, integer
seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
Returns:
mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
"""
m = X.shape[1] # number of training examples
mini_batches = []
# Step 1: Shuffle (X, Y)
permutation = list(np.random.permutation(m))
shuffled_X = X[:, permutation]
shuffled_Y = Y[:, permutation].reshape((Y.shape[0],m))
# Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
for k in range(0, num_complete_minibatches):
mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
# starting TF graph
# Create X and Y placeholders
def create_xy_placeholder(n_x, n_y):
X = tf.placeholder(tf.float32, shape = [n_x, None], name = 'X')
Y = tf.placeholder(tf.float32, shape = [n_y, None], name = 'Y')
return X, Y
# initialize parameters hidden layers
def initialize_parameters(n_x, scale, hidden_units):
hidden_units= [n_x] + hidden_units
parameters = {}
regularizer = tf.contrib.layers.l2_regularizer(scale)
for i in range(0, len(hidden_units[1:])):
with tf.variable_scope('hidden_parameters_'+str(i+1)):
w = tf.get_variable("W"+str(i+1), [hidden_units[i+1], hidden_units[i]],
initializer=tf.contrib.layers.xavier_initializer(),
regularizer=regularizer)
b = tf.get_variable("b"+str(i+1), [hidden_units[i+1], 1],
initializer = tf.constant_initializer(0.1))
parameters.update({"W"+str(i+1): w})
parameters.update({"b"+str(i+1): b})
return parameters
# forward progression with batch norm and dropout
def forward_propagation(X, parameters, batch_norm=False, keep_prob=1):
a_new = X
for i in range(0, int(len(parameters)/2)-1):
with tf.name_scope('forward_pass_'+str(i+1)):
w = parameters['W'+str(i+1)]
b = parameters['b'+str(i+1)]
z = tf.matmul(w, a_new) + b
if batch_norm == True:
z = tf.layers.batch_normalization(z, momentum=0.99, axis=0)
a = tf.nn.relu(z)
if keep_prob < 1:
a = tf.nn.dropout(a, keep_prob)
a_new = a
tf.summary.histogram('act_'+str(i+1), a_new)
# calculating final Z before input into cost as logit
with tf.name_scope('forward_pass_'+str(int(len(parameters)/2))):
w = parameters['W'+str(int(len(parameters)/2))]
b = parameters['b'+str(int(len(parameters)/2))]
z = tf.matmul(w, a_new) + b
if batch_norm == True:
z = tf.layers.batch_normalization(z, momentum=0.99, axis=0)
return z
# compute cost with option for l2 regularizatoin
def compute_cost(z, Y, parameters, l2_reg=False):
with tf.name_scope('cost'):
logits = tf.transpose(z)
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits,
labels = labels))
if l2_reg == True:
reg = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
cost = cost + tf.reduce_sum(reg)
with tf.name_scope('Pred/Accuracy'):
prediction=tf.argmax(z)
correct_prediction = tf.equal(tf.argmax(z), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
return cost, prediction, accuracy
# defining the model (need to add keep_prob for dropout)
def model(X_train, Y_train, X_test, Y_test,
hidden_units=[30, 20, 4], # hidden units/layers
learning_rate = 0.0001, # Learning rate
num_epochs = 10000, minibatch_size = 30, # minibatch/ number epochs
keep_prob=0.5, # dropout
batch_norm=True, # batch normalization
l2_reg=True, scale = 0.01, # L2 regularization/scale is lambda
print_cost = True):
ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables
tf.set_random_seed(1) # to keep consistent results
seed = 3 # to keep consistent results
(n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set)
n_y = Y_train.shape[0] # n_y : output size
costs = [] # To keep track of the cost
# Create Placeholders of shape (n_x, n_y)
X, Y = create_xy_placeholder(n_x, n_y)
# Initialize parameters
parameters = initialize_parameters(n_x, scale, hidden_units)
# Forward propagation: Build the forward propagation in the tensorflow graph
z = forward_propagation(X, parameters, keep_prob, batch_norm)
# Cost function: Add cost function to tensorflow graph
cost, prediction, accuracy = compute_cost(z, Y, parameters, l2_reg)
# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
# Initialize all the variables
init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# Start the session to compute the tensorflow graph
with tf.Session(config=config) as sess:
# Run the initialization
sess.run(init)
# Do the training loop
for epoch in range(num_epochs):
epoch_cost = 0. # Defines a cost related to an epoch
num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
seed = seed + 1
minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
# Select a minibatch
(minibatch_X, minibatch_Y) = minibatch
# IMPORTANT: The line that runs the graph on a minibatch.
# Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
_ , minibatch_cost = sess.run([optimizer, cost],
feed_dict = {X: minibatch_X, Y: minibatch_Y})
epoch_cost += minibatch_cost / num_minibatches
# Print the cost every epoch
if print_cost == True and epoch % 100 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
prediction1=tf.argmax(z)
# print('Z5: ', Z5.eval(feed_dict={X: minibatch_X, Y: minibatch_Y}))
print('prediction: ', prediction1.eval(feed_dict={X: minibatch_X,
Y: minibatch_Y}))
correct1=tf.argmax(Y)
# print('Y: ', Y.eval(feed_dict={X: minibatch_X,
# Y: minibatch_Y}))
print('correct: ', correct1.eval(feed_dict={X: minibatch_X,
Y: minibatch_Y}))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
# lets save the parameters in a variable
parameters = sess.run(parameters)
print ("Parameters have been trained!")
# Calculate the correct predictions
correct_prediction = tf.equal(tf.argmax(z), tf.argmax(Y))
# Calculate accuracy on the test set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
# run model on test data
parameters = model(x_train, y_train, x_test, y_test, keep_prob=1)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

how should I specify batch size in LSTM? - python

I would suggest changing this line input_tensor = Input(batch_shape=(batch, timestep, X_train.shape[2])) to input_tensor = tf.keras.layers.Input(shape=(timestep, X_train.shape[2])) and then defining your batch_size in model.fit and make sure X_train and y_train have the same number of samples.

Related

Input shape issue with LSTM in keras

How to increase prediction horizon for LSTM network?

How to plot the fitted vs actual data while model calibrates over training period in Tensorflow Python

How to solve the value errors in rNN?

loss: nan using Keras vs non-nan (working) output using tensorflow

Categories

Resources