I've been a training a model and with htop I see that the memory keeps increasing with every iteration.Looking arround most people say that the graph must keep on growing either because I'm loading a new model with every iteration or because I add new ops, but I do none of the above.
This is the smallest reproducible example.
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
#%% Params
OBSERVATION_SPACE_VALUES = 4
ACTION_SPACE_SIZE = 2
LEARNING_RATE = 0.00025/4
DENSE_PARAMS = [256]
class Network():
def __init__(self, state_size=OBSERVATION_SPACE_VALUES, action_size=ACTION_SPACE_SIZE, learning_rate=LEARNING_RATE,
dense_params=DENSE_PARAMS):
self.state_size = state_size
self.action_size = action_size
self.learning_rate= learning_rate
self.model = self.create_model(dense_params)
def create_model(self, dense_params=[256]):
model = Sequential()
for params in dense_params:
units = params
model.add(Dense(units, activation='relu',input_shape=[self.state_size]))
model.add(Dense(self.action_size, activation="linear"))
model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
return model
Agent = Network()
for i in range(10_000):
state = np.random.rand(Agent.state_size)
state = np.expand_dims(state, axis=0)
output = np.random.rand(Agent.action_size)
output = np.expand_dims(output, axis=0)
Agent.model.fit(state,output,verbose=True)
And also:
tf.__version__
2.0.0
tf.keras.__version__
2.2.4-tf
The problem is the use of multiple .fit calls. To solve this, you can either:
create a data_generator of your data and call .fit(epochs=10000)
or
keep the for loop but call `train_on_batch (doc) instead
Related
I structured a Convolutional LSTM model to predict the forthcoming Bitcoin price data, using the analyzed past data of the Bitcoin close price and other features.
Let me jump straight to the code:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import tensorflow.keras as keras
import keras_tuner as kt
from keras_tuner import HyperParameters as hp
from keras.models import Sequential
from keras.layers import InputLayer, ConvLSTM1D, LSTM, Flatten, RepeatVector, Dense, TimeDistributed
from keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
import keras.backend as K
from keras.losses import Huber
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
DIR = '../input/btc-features-targets'
SEG_DIR = '../input/segmented'
segmentized_features = os.listdir(SEG_DIR)
btc_train_features = []
for seg in segmentized_features:
train_features = pd.read_csv(f'{SEG_DIR}/{seg}')
train_features.set_index('date', inplace=True)
btc_train_features.append(scaler.fit_transform(train_features.values))
btc_train_targets = pd.read_csv(f'{DIR}/btc_train_targets.csv')
btc_train_targets.set_index('date', inplace=True)
btc_test_features = pd.read_csv(f'{DIR}/btc_test_features.csv')
btc_tef1 = btc_test_features.iloc[:111]
btc_tef2 = btc_test_features.iloc[25:]
btc_tef1.set_index('date', inplace=True)
btc_tef2.set_index('date', inplace=True)
btc_test_targets = pd.read_csv(f'{DIR}/btc_test_targets.csv')
btc_test_targets.set_index('date', inplace=True)
btc_trt_log = np.log(btc_train_targets)
btc_tefs1 = scaler.fit_transform(btc_tef1.values)
btc_tefs2 = scaler.fit_transform(btc_tef2.values)
btc_tet_log = np.log(btc_test_targets)
scaled_train_features = []
for features in btc_train_features:
shape = features.shape
scaled_train_features.append(np.expand_dims(features, [0,3]))
shape_2 = btc_tefs1.shape
btc_tefs1 = np.expand_dims(btc_tefs1, [0,3])
shape_3 = btc_tefs2.shape
btc_tefs2 = np.expand_dims(btc_tefs2, [0,3])
btc_trt_log = btc_trt_log.values[0]
btc_tet_log = btc_tet_log.values[0]
def build(hp):
model = keras.Sequential()
# Input Layer
model.add(InputLayer(input_shape=(111,32,1)))
# ConvLSTM1D
convLSTM_hp_filters = hp.Int(name='convLSTM_filters', min_value=32, max_value=512, step=32)
convLSTM_hp_kernel_size = hp.Choice(name='convLSTM_kernel_size', values=[3,5,7])
convLSTM_activation = hp.Choice(name='convLSTM_activation', values=['selu', 'relu'])
model.add(ConvLSTM1D(filters=convLSTM_hp_filters,
kernel_size=convLSTM_hp_kernel_size,
padding='same',
activation=convLSTM_activation,
use_bias=True,
bias_initializer='zeros'))
# Flatten
model.add(Flatten())
# RepeatVector
model.add(RepeatVector(5))
# LSTM
LSTM_hp_units = hp.Int(name='LSTM_units', min_value=32, max_value=512, step=32)
LSTM_activation = hp.Choice(name='LSTM_activation', values=['selu', 'relu'])
model.add(LSTM(units=LSTM_hp_units, activation=LSTM_activation, return_sequences=True))
# TimeDistributed Dense
dense_units = hp.Int(name='dense_units', min_value=32, max_value=512, step=32)
dense_activation = hp.Choice(name='dense_activation', values=['selu', 'relu'])
model.add(TimeDistributed(Dense(units=dense_units, activation=dense_activation)))
# TimeDistributed Dense_Output
model.add(Dense(1))
# Set Learning Rate
hp_learning_rate = hp.Choice(name='learning_rate', values=[1e-2, 1e-3, 1e-4])
# Compile Model
model.compile(optimizer=Adam(learning_rate=hp_learning_rate),
loss=Huber(),
metrics=[RootMeanSquaredError()])
return model
tuner = kt.Hyperband(build,
objective=kt.Objective('root_mean_squared_error', direction='min'),
max_epochs=10,
factor=3)
early_stop = EarlyStopping(monitor='root_mean_squared_error', patience=5)
opt_hps = []
for train_features in scaled_train_features:
tuner.search(train_features, btc_trt_log, epochs=50, callbacks=[early_stop])
opt_hps.append(tuner.get_best_hyperparameters(num_trials=1)[0])
models, epochs = ([] for _ in range(2))
for hps in opt_hps:
model = tuner.hypermodel.build(hps)
models.append(model)
history = model.fit(train_features, btc_trt_log, epochs=70, verbose=0)
rmse = history.history['root_mean_squared_error']
best_epoch = rmse.index(min(rmse)) + 1
epochs.append(best_epoch)
hypermodel = tuner.hypermodel.build(opt_hps[0])
for train_features, epoch in zip(scaled_train_features, epochs): hypermodel.fit(train_features, btc_trt_log, epochs=epoch)
tp1 = hypermodel.predict(btc_tefs1).flatten()
tp2 = hypermodel.predict(btc_tefs2).flatten()
test_predictions = np.concatenate((tp1, tp2[86:]), axis=None)
The hyperparameters of the model are configured using keras_tuner; as there were ResourceExhaustError issues output by the notebook when training is done with the full features dataset, sequentially segmented datasets are used instead (and apparently, referring to the study done utilizing the similar model architecture, training is able to be efficiently done through this training approach).
The input dimension of each segmented dataset is (111,32,1).
There aren't any issues reported until before the last code block. The models work fine. Yet, when the .predict() function is executed, the notebook prints out an error, which states that the dimension of the input features for making predictions is incompatible with the dimension of the input features used while training. I did not understand the reason behind its occurrence, since as far as I know, the input dimensions of a train dataset for a DNN model cannot be identical as the input dimensions of a test dataset.
Even though all the price data from 2018 to early 2021 are used as training datasets, predictions are only needed for the mid 2021 timeframe.
The dataset used for prediction has a dimension of (136,32,1).
I tried matching the dimension of this dataset to (111,32,1), through index slicing.
Now this showed issues in the output dimension. While predictions should be made for 136 data points, the result only returned 10.
Are there any issues relevant to the model configuration? Cannot interpret the current situation.
I'm trying to build a sequential neural network with keras. I generate a dataset with inserting randoms in a known function and train my model with this dataset, long enough to get a steady loss. Then I ask the model to predict the x_train values, but instead of predicting something close to y_train, it returns the same value regardless of the input x. This value also happens to be the average of y_train values. I don't understand what I'm doing wrong and why this is happening.
I'm using the following function for training the model:
def train_model(x_train,y_train,batch_size,input_size,layer_sizes,activations,optimizer,epochs,loss='MeanSquaredError'):
assert len(layer_sizes) == len(activations)
n_layers=len(layer_sizes)
model = Sequential()
model.add(LayerNormalization(input_dim=input_size))
model.add(Dense(layer_sizes[0],kernel_regularizer='l2',kernel_initializer='ones',activation=activations[0],input_dim=input_size,name='layer1'))
for i in range(1,n_layers):
model.add(Dense(layer_sizes[i],kernel_initializer='ones',activation=activations[i],name=f'layer{i+1}'))
model.compile(
optimizer = optimizer,
loss = loss, #MeanSquaredLogarithmicError
)
print(model.summary())
history = model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs)
loss_history = history.history['loss']
plt.scatter(x=np.arange(1,epochs+1),y=loss_history)
plt.show()
return model
I then created an arbitrary function (just for test purposes) as:
def func(x1,x2,x3,x4):
y=(x1**3+(x2*x3+2))/(x4+x2*x1)
return y
and made a random dataset with this function:
def random_points_in_range(n,ranges):
points = np.empty((n,len(ranges)))
for i,element in enumerate(ranges):
start=min(element[1],element[0])
interval=abs(element[1]-element[0])
rand_check = np.random.rand(n)
randoms = ( rand_check*interval ) + start
points[:,i] = randoms.T
return points
def generate_random_dataset(n=200,ranges=[(0,10),(0,10),(0,10),(0,10)]):
x_dataset = random_points_in_range(n,ranges)
y_dataset = np.empty(n)
for i in range(n):
x1,x2,x3,x4 = x_dataset[i]
y_dataset[i] = func(x1,x2,x3,x4)
return x_dataset,y_dataset
I then train a model with these functions:
x_train,y_train = generate_random_dataset()
layer_sizes = [6,8,10,10,1]
activations = [LeakyReLU(),'relu','swish','relu','linear']
opt = Adam(learning_rate=0.001)
epochs = 3000
model=train_model(x_train,y_train,5,4,layer_sizes,activations,opt,epochs,loss='MeanSquaredError')
if you want to run the code these are things you need to import:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LayerNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
I have several DistributionLambda layers as the outputs of one model, and I would like to make a Concatenate-like operation into a new layer, in order to have only one output that is the mix of all the distributions, assuming they are independent. Then, I can apply a log-likelihood loss to the output of the model. Otherwise, I cannot apply the loss over a Concatenate layer, because it lost the log_prob method. I have been trying with the Blockwise distribution, but with no luck so far.
Here an example code:
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow_probability import distributions
from tensorflow_probability import layers as tfp_layers
def likelihood_loss(y_true, y_pred):
"""Adding negative log likelihood loss."""
return -y_pred.log_prob(y_true)
def distribution_fn(params):
"""Distribution function."""
return distributions.Normal(
params[:, 0], math.log(1.0 + math.exp(params[:, 1])))
output_steps = 3
...
lstm_layer = layers.LSTM(10, return_state=True)
last_layer, l_h, l_c = lstm_layer(last_layer)
lstm_state = [l_h, l_c]
dense_layer = layers.Dense(2)
last_layer = dense_layer(last_layer)
last_layer = tfp_layers.DistributionLambda(
make_distribution_fn=distribution_fn)(last_layer)
output_layers = [last_layer]
# Get output sequence, re-injecting the output of each step
for number in range(1, output_steps):
last_layer = layers.Reshape((1, 1))(last_layer)
last_layer, l_h, l_c = lstm_layer(last_layer, initial_state=lstm_states)
# Storing state for next time step
lstm_states = [l_h, l_c]
last_layer = tfp_layers.DistributionLambda(
make_distribution_fn=distribution_fn)(dense_layer(last_layer))
output_layers.append(last_layer)
# This does not work
# last_layer = distributions.Blockwise(output_layers)
# This works for the model but cannot compute loss
# last_layer = layers.Concatenate(axis=1)(output_layers)
the_model = models.Model(inputs=[input_layer], outputs=[last_layer])
the_model.compile(loss=likelihood_loss, optimizer=optimizers.Adam(lr=0.001))
The problem is your Input, not your output layer ;)
Input:0 is referenced in your error message.
Could you try to be more specific about your input?
I am building a simple neural network using Keras. It has activity regularization so that the output of the only hidden layer is forced to have small values. Here is the code:
import numpy as np
import math
import keras
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Activation
from keras import regularizers
from keras import backend as K
a=1
def my_regularizer(inputs):
means=K.mean((inputs),axis=1)
return a*K.sum(means)**2
x_train=np.random.uniform(low=-1,high=1,size=(200,2))
model=Sequential([
Dense(20,input_shape=(2,),activity_regularizer=my_regularizer),
Activation('tanh'),
Dense(2,),
Activation('linear')
])
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(x_train,x_train,epochs=20,validation_split=0.1)
Questions:
1) Currently, parameter a is set at the beginning and it does not change. How can I change the code such that the parameter a is updated after each iteration such that
a_new=f(a_old,input)
where input is the values at the hidden layer and f(.) is an arbitrary function.
2) I want my activity regularizer to be applied after the first activation function tanh is applied. Have I written my code correctly? The term "activity_regularizer=my_regularizer" in
Dense(20,input_sahpe=(2,),activity_regularizer=my_regularizer)
makes me feel that the regularizer is being applied to values before the activation function tanh.
You can - but first, you need a valid Keras Regularizer object (your function won't work):
class MyActivityRegularizer(Regularizer):
def __init__(self, a=1):
self.a = K.variable(a, name='a')
# gets called at each train iteration
def __call__(self, x): # your custom function here
means = K.mean(x, axis=1)
return self.a * K.sum(means)**2
def get_config(self): # required class method
return {"a": float(K.get_value(self.a))}
Next, to work with .fit, you need a custom Keras Callback object (see alternative at bottom):
class ActivityRegularizerScheduler(Callback):
""" 'on_batch_end' gets automatically called by .fit when finishing
iterating over a batch. The model, and its attributes, are inherited by
'Callback' (except at __init__) and can be accessed via, e.g., self.model """
def __init__(self, model, update_fn):
self.update_fn=update_fn
self.activity_regularizers=_get_activity_regularizers(model)
def on_batch_end(self, batch, logs=None):
iteration = K.get_value(self.model.optimizer.iterations)
new_activity_reg = self.update_fn(iteration)
# 'activity_regularizer' references model layer's activity_regularizer (in this
# case 'MyActivityRegularizer'), so its attributes ('a') can be set directly
for activity_regularizer in self.activity_regularizers:
K.set_value(activity_regularizer.a, new_activity_reg)
def _get_activity_regularizers(model):
activity_regularizers = []
for layer in model.layers:
a_reg = getattr(layer,'activity_regularizer',None)
if a_reg is not None:
activity_regularizers.append(a_reg)
return activity_regularizers
Lastly, you'll need to create your model within the Keras CustomObjectScope - see in full ex. below.
Example usage:
from keras.layers import Dense
from keras.models import Sequential
from keras.regularizers import Regularizer
from keras.callbacks import Callback
from keras.utils import CustomObjectScope
from keras.optimizers import Adam
import keras.backend as K
import numpy as np
def make_model(my_reg):
return Sequential([
Dense(20, activation='tanh', input_shape=(2,), activity_regularizer=my_reg),
Dense(2, activation='linear'),
])
my_reg = MyActivityRegularizer(a=1)
with CustomObjectScope({'MyActivityRegularizer':my_reg}): # required for Keras to recognize
model = make_model(my_reg)
opt = Adam(lr=1e-4)
model.compile(optimizer=opt, loss='mse')
x = np.random.randn(320,2) # dummy data
y = np.random.randn(320,2) # dummy labels
update_fn = lambda x: .5 + .4*np.cos(x) #x = number of train updates (optimizer.iterations)
activity_regularizer_scheduler = ActivityRegularizerScheduler(model, update_fn)
model.fit(x,y,batch_size=32,callbacks=[activity_regularizer_scheduler],
epochs=4,verbose=1)
To TRACK your a and make sure it's changing, you can get its value at, e.g., each epoch end via:
for epoch in range(4):
model.fit(x,y,batch_size=32,callbacks=[activity_regularizer_scheduler],epochs=1)
print("Epoch {} activity_regularizer 'a': {}".format(epoch,
K.get_value(_get_activity_regularizers(model)[0].a)))
# My output:
# Epoch 0 activity_regularizer 'a': 0.7190816402435303
# Epoch 1 activity_regularizer 'a': 0.4982417821884155
# Epoch 2 activity_regularizer 'a': 0.2838689386844635
# Epoch 3 activity_regularizer 'a': 0.8644570708274841
Regarding (2), I'm afraid you're right - the 'tanh' outputs won't be used; you'll need to pass activation='tanh' instead.
Lastly, you can do it without a callback, via train_on_batch - but a drawback is, you'll need to feed data to the model yourself (and shuffle it, etc):
activity_regularizers = _get_activity_regularizers(model)
for iteration in range(100):
x, y = get_data()
model.train_on_batch(x,y)
iteration = K.get_value(model.optimizer.iterations)
for activity_regularizer in activity_regularizers:
K.set_value(activity_regularizer, update_fn(iteration))
I have trained a NN using tf.keras and saved the whole model with ModelCheckpoint in a .h5 file.
However, when I restore it with models.load_model and then train it again with the method fit, it only returns a History object and does nothing more.
Below is the minimal example for the training:
import numpy as np
import tensorflow as tf
# Creates dummy data
train_x = np.random.randint(10,size=40).reshape(-1,1)
train_y = np.random.randint(2,size=40).reshape(-1,1)
train_set = (train_x,train_y)
val_x = np.random.randint(10,size=20).reshape(-1,1)
val_y = np.random.randint(2,size=20).reshape(-1,1)
val_set = (val_x,val_y)
# Set Learning Rate Decay
import math
def step_decay(epoch):
print('--- Epoch:',epoch)
print(tf.keras.callbacks.History())
init_lr = 0.001
drop = 0.9
epochs_drop = 1.0
lr = init_lr*math.pow(drop,math.floor((1+epoch)/epochs_drop))
return(lr)
lr_callback = tf.keras.callbacks.LearningRateScheduler(step_decay)
# Saves the whole model
cp_callback = tf.keras.callbacks.ModelCheckpoint('model.h5',
save_weights_only=False,
verbose=True)
# Creates the model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1,activation='relu',use_bias=False,input_dim=(1)))
model.add(tf.keras.layers.Dense(100,activation='relu',use_bias=False))
model.add(tf.keras.layers.Dense(1,activation='relu',use_bias=False))
model.compile(loss='mean_squared_error',
optimizer=tf.keras.optimizers.Adam(),
metrics=['accuracy'])
print('Learning Rate: ',tf.keras.backend.eval(model.optimizer.lr))
# Train the model
model.fit(x=train_set[0],y=train_set[1],epochs=2,steps_per_epoch=40,
validation_data=val_set,validation_steps=20,
callbacks=[lr_callback,cp_callback])
print('Learning Rate: ',tf.keras.backend.eval(model.optimizer.lr))
The code I am currently using to load it again is the follow one.
import numpy as np
import tensorflow as tf
# Creates dummy data
train_x = np.random.randint(10,size=40).reshape(-1,1)
train_y = np.random.randint(2,size=40).reshape(-1,1)
train_set = (train_x,train_y)
val_x = np.random.randint(10,size=20).reshape(-1,1)
val_y = np.random.randint(2,size=20).reshape(-1,1)
val_set = (val_x,val_y)
# Set Learning Rate Decay
import math
def step_decay(epoch):
print('--- Epoch:',epoch)
print(tf.keras.callbacks.History())
init_lr = 0.001
drop = 0.9
epochs_drop = 1.0
lr = init_lr*math.pow(drop,math.floor((1+epoch)/epochs_drop))
return(lr)
lr_callback = tf.keras.callbacks.LearningRateScheduler(step_decay)
# Saves the whole model
cp_callback = tf.keras.callbacks.ModelCheckpoint('model.h5',
save_weights_only=False,
verbose=True)
# Load model
model = tf.keras.models.load_model('model.h5')
print('Learning Rate: ',tf.keras.backend.eval(model.optimizer.lr))
model.fit(x=train_set[0],y=train_set[1],epochs=2,steps_per_epoch=40,
validation_data=val_set,validation_steps=20,initial_epoch=3,
callbacks=[lr_callback,cp_callback])
As you can observe when running it is that the learning rate is restored hence the whole model as well, or at least that's what I think. However, after running model.fit(...) it does nothing but return <tensorflow.python.keras.callbacks.History object at 0x7f11c81cb940>. Any idea how to allow it to train again?
EDIT: I also tried to compile it by setting the compile attribute of load_model to true.
Did you try to compile it after loading?