I am trying to use keras dense neural networks to forecast some time series.
When fitting my model on complex real datasets, my model converges toward a constant output, i.e. whatever the input, the model gives the same output (which seems to be a reasonable estimate of the mean of my dataset).
I reduced the problem up to very simple simulated datasets, and still have the same issue. Here is a minimal working example:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
X = []
Y = []
for jh in range(10000):
x = np.arange(-1, 1, 0.01)
y = 1+x*((np.random.random()-0.5))
y += np.random.randn(len(x))/(100)
X.append(y[:100])
Y.append(y[100:])
X = np.array(X)[:,:,None]
Y = np.array(Y)[:,:,None]
model = models.Sequential()
model.add(layers.Input((100,1,)))
model.add(layers.Flatten())
model.add(layers.Dense(100, activation='sigmoid'))
model.add(layers.Dense(100, activation='sigmoid'))
model.add(layers.Dense(100, activation='sigmoid'))
model.add(tf.keras.layers.Reshape((100,1)))
model.compile(loss = tf.keras.losses.MeanSquaredError(),optimizer="adam")
# model.summary()
print("Fit model on training data")
print("Fit model on training data")
history = model.fit(x=X, y=Y, batch_size=10000, epochs=200)
for k in np.arange(0,10000,1000):
plt.plot(np.arange(len(X[k])), X[k])
plt.plot(np.arange(len(X[k]), len(X[k])+len(Y[k])), model(X)[k])
plt.plot(np.arange(len(X[k]), len(X[k])+len(Y[k])), Y[k])
In this example, the model returns exactly same output regardless of the input.
I tried to change the number of layers, the loss function, the learning rate, the batch size and the number of epochs, without any noticeable improvement.
Do you have any suggestion on this issue?
If you rearrange your random inputs to be like
y = np.array(1. + x)
y += 1. / 100.
also
J, K = [] , []
for jh in range(10000):
j = np.arange(-1, 1, 0.01)
k = -np.array(1. - j)
k += 1. / 100
J.append(k[:100])
K.append(k[100:])
J = np.array(J)[:, :, None]
K = np.array(K)[:, :, None]
and finally add
plt.plot(np.arange(len(X[k]), len(X[k]) + len(Y[k])), model(J)[k])
in the plotting loop, then you will see two different results. Probably you should check your datasets diversity.
I have difficulties writing a custom loss function that makes use of some random weights generated according to the class/state predicted by the Softmax output. The desired property is:
The model is a simple feedforward neural network with input-dimension as 1 and the output dimension as 6.
The activation function of the output layer is Softmax, which intends to estimate the actual number of classes or states using Argmax.
Note that the training data only consists of X (there is no Y).
The loss function is defined according to random weights (i.e., Weibull distribution) sampled based on the predicted state number for each input sample X.
As follows, I provided a minimal example for illustration. For simplification purposes, I only define the loss function based on the random weights for state/class-1. I get: "ValueError: No gradients provided for any variable: ['dense_41/kernel:0', 'dense_41/bias:0', 'dense_42/kernel:0', 'dense_42/bias:0']."
As indicated in the post below, I found out that argmax is not differntiable, and a softargmax function would help (as I implemented in the following code). However, I still get the same error.
Getting around tf.argmax which is not differentiable
import sys
import time
from tqdm import tqdm
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from scipy.stats import weibull_min
###############################################################################################
# Generate Dataset
lb = np.array([2.0]) # Left boundary
ub = np.array([100.0]) # Right boundary
# Data Points - uniformly distributed
N_r = 50
X_r = np.linspace(lb, ub, N_r)
###############################################################################################
#Define Model
class DGM:
# Initialize the class
def __init__(self, X_r):
#Normalize training input data
self.Xmean, self.Xstd = np.mean(X_r), np.std(X_r)
X_r = (X_r - self.Xmean) / self.Xstd
self.X_r = X_r
#Input and output variable dimensions
self.X_dim = 1; self.Y_dim = 6
# Define tensors
self.X_r_tf = tf.convert_to_tensor(X_r, dtype=tf.float32)
#Learning rate
self.LEARNING_RATE=1e-4
#Feedforward neural network model
self.modelTest = self.test_model()
###############################################
# Initialize network weights and biases
def test_model(self):
input_shape = self.X_dim
dimensionality = self.Y_dim
model = tf.keras.Sequential()
model.add(layers.Input(shape=input_shape))
model.add(layers.Dense(64, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
model.add(layers.Activation('tanh'))
model.add(layers.Dense(dimensionality))
model.add(layers.Activation('softmax'))
return model
##############################################
def compute_loss(self):
#Define optimizer
gen_opt = tf.keras.optimizers.Adam(lr=self.LEARNING_RATE, beta_1=0.0,beta_2=0.9)
with tf.GradientTape() as test_tape:
###### calculate loss
generated_u = self.modelTest(self.X_r_tf, training=True)
#number of data
n_data = generated_u.shape[0]
#initialize random weights assuming state-1 at all input samples
wt1 = np.zeros((n_data, 1),dtype=np.float32) #initialize weights
for b in range(n_data):
wt1[b] = weibull_min.rvs(c=2, loc=0, scale =4 , size=1)
wt1 = tf.reshape(tf.convert_to_tensor(wt1, dtype=tf.float32),shape=(n_data,1))
#print('-----------sampling done-----------')
#determine the actual state using softargmax
idst = self.softargmax(generated_u)
idst = tf.reshape(tf.cast(idst, tf.float32),shape=(n_data,1))
#index state-1
id1 = tf.constant(0.,dtype=tf.float32)
#assign weights if predicted state is state-1
wt1_final = tf.cast(tf.equal(idst, id1), dtype=tf.float32)*wt1
#final loss
test_loss = tf.reduce_mean(tf.square(wt1_final))
#print('-----------test loss calcuated-----------')
gradients_of_modelTest = test_tape.gradient(test_loss,
[self.modelTest.trainable_variables])
gen_opt.apply_gradients(zip(gradients_of_modelTest[0],self.modelTest.trainable_variables))
return test_loss
#reference: Getting around tf.argmax which is not differentiable
#https://stackoverflow.com/questions/46926809/getting-around-tf-argmax-which-is-not-differentiable
def softargmax(self, x, beta=1e10):
x = tf.convert_to_tensor(x)
x_range = tf.range(x.shape.as_list()[-1], dtype=x.dtype)
return tf.reduce_sum(tf.nn.softmax(x*beta,axis=1) * x_range, axis=-1)
##############################################
def train(self,training_steps=100):
train_start_time = time.time()
for step in tqdm(range(training_steps), desc='Training'):
start = time.time()
test_loss = self.compute_loss()
if (step + 1) % 10 == 0:
elapsed_time = time.time() - train_start_time
sec_per_step = elapsed_time / step
mins_left = ((training_steps - step) * sec_per_step)
tf.print("\nStep # ", step, "/", training_steps,
output_stream=sys.stdout)
tf.print("Current time:", elapsed_time, " time left:",
mins_left, output_stream=sys.stdout)
tf.print("Test Loss: ", test_loss, output_stream=sys.stdout)
###############################################################################################
#Define and train the model
model = DGM(X_r)
model.train(training_steps=100)
I am very new to neural networks. And I'm learning some basics and I'm stuck at this point.
I have played with many parameters included the inital values of the dynamic system. I dropped the learning rate and changed the number of epoches and the batch size. Also I changed the number of samples I give to the fit function. I added a hidden layer and removed one again. After all nothing really helped. Sometimes the predicted value is pretty close and sometimes its really far away from the tested value. Do I miss something? Or how can I improve neural networks in general to get what I want to predict? Do I have to find a "sweet spot"?
Below you can find my code. I would be glad for new ideas to improve the network. I am a beginner and this is my first StackOverflow post.
######################### import stuff ##########################
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time
import matplotlib.pyplot as p
from keras import backend as K
###Model
k_dim =1000 # Number of timestamps
seq_dim = 1000 # Number of samples
def g_func(x):
return np.power(x,2)
input1 = keras.Input(shape=(k_dim,))
input2 = keras.Input(shape=(k_dim,))
merged = layers.concatenate([input1,input2])
alpha_pred = layers.Dense(32,input_dim=2, activation='relu')(merged)
x = layers.Dense(16,activation='sigmoid')(alpha_pred)
output_a = layers.Dense(1)(x)
model = keras.Model(inputs=[input1,input2], outputs=output_a)
model.compile(loss='mean_absolute_error', optimizer='sgd', metrics=['mean_squared_error'])
model.summary()
######################## Simulating Input and Output data ########################
n = 2
alpha = np.random.rand(1,seq_dim)
m = 1
X_train = np.random.rand(seq_dim,k_dim)
X_train[:,0] = 0
u = [float] * seq_dim
y_train = np.zeros((seq_dim,k_dim))
for j in range (seq_dim):
u = X_train[j,:]
for k in range(k_dim-1):
for i in range(n-1):
y_train[j,k+1] = alpha[0,i] * y_train[j, k-i] + g_func(u[k])
alpha = np.transpose(alpha)
print('Learning rate before first fit:', model.optimizer.learning_rate.numpy())
history = model.fit([X_train,y_train], alpha, batch_size=64, epochs=3000)
print("Learning rate before second fit:", model.optimizer.learning_rate.numpy())
K.set_value(model.optimizer.learning_rate, 0.001)
history = model.fit([X_train,y_train], alpha, batch_size=64, epochs=1000)
# Plot the lossfunction and mean squared error
p.plot(history.history['loss'], 'b-', label='LOSS')
p.plot(history.history['mean_squared_error'], 'r-', label='Mean squared error')
p.legend(loc='best')
p.draw()
print('Model trained...')
time.sleep(2)
alpha = None
X_train = None
y_train = None
u = None
seq_dim = 1
#####
###Model has been trained. Lets test with new x and y to get one alpha###
####
X_train = np.random.rand(seq_dim,k_dim)
u = [float] * seq_dim
y_train = np.zeros((seq_dim,k_dim))
alpha = np.array([0.9])
for j in range (seq_dim):
u = X_train[j, :]
for k in range(k_dim-1):
for i in range(n-1):
y_train[j,k+1] = alpha[i] * y_train[j,k-i] + g_func(u[k])
z = model.predict([X_train, y_train])
#Compare the real value with the predicted value
print('Comparing real value with predicted value')
for i,j in zip(alpha,z):
print('{} => {}'.format(i,j))
p.show()
The problem might be using sigmoid. This causes vanishing gradients too often and explodes gradients sometimes while backpropogating because its derivates are in range 0-0.25.
You have to know how your data is spread using plot. Also remove outliers in regression problems.
If the input data is too spread in your plot then the model will not predict close to the correct value all the time.
To calculate the second derivative of a neural network output with respect to the input using a keras model, I implemented codes used in those 2 posts :
Keras: using the sum of the first and second derivatives of a model as final output
Second derivative in Keras
However, for both techniques, the second derivative is always equal to 0. Here is the problem reproduced on a simple regression of the quadratic function:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.backend import set_session
from tensorflow.keras import datasets, layers, models
from tensorflow.keras import optimizers
import numpy as np
x = np.linspace(0, 1, 1000)
y = x**2
def model_regression():
model = tf.keras.Sequential([
layers.Dense(1024, input_dim=1, activation="relu"),
layers.Dense(1, activation="linear")])
model.compile(optimizer=optimizers.Adam(lr=0.001),
loss='mean_squared_error')
return model
my_model = model_regression()
my_model.fit(x, y, epochs=10, batch_size=20)
y_pred = my_model.predict(x)
#### Technique 1
first_input = K.gradients(my_model.output, my_model.input)
second_input = K.gradients(first_input, my_model.input)
iterate_first = K.function([my_model.input], [first_input])
iterate_second = K.function([my_model.input], [second_input])
#### Technique 2
# def grad(y, x):
# print(y.shape)
# return layers.Lambda(lambda z: K.gradients(z[0], z[1]), output_shape=[1])([y, x])
# derivative1 = grad(my_model.output, my_model.input)
# derivative2 = grad(derivative1, my_model.input)
# iterate_first = K.function([my_model.input], [derivative1])
# iterate_second = K.function([my_model.input], [derivative2])
first_derivative, second_derivative = [], []
for i in range(x.shape[0]):
first_derivative.append(iterate_first(np.array([[x[i]]]))[0][0][0])
second_derivative.append(iterate_second(np.array([[x[i]]]))[0][0][0])
Here is the graphical result, as you can see the second derivative is always equal to 0 while it should approximatively be equal to 2.
How can I compute correctly the second derivative of my neural network in keras ?
So I have three models created in three different files: Model_A.py, Model_B.py, Model_C.py. Model_A is the first one i have created. When I run Model_A, everything works well. However, when I run Models B or C python still runs Model A. I guessed it has to do with the session, but I am not sure and I have not figured out how to fix it.
Here is Code for model A.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import glob
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #this removes some warning comments. This warning comments express that this PC has a CPU able to
#compute much faster, and that tensorflow was not designed for it. For the moment, will keep it like this. If necessary, we'll use GPU
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow.keras.callbacks import Callback
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import warnings
from math import sqrt
from keras import backend as K
#Early stopping based on loss
class EarlyStoppingByLossVal(Callback):
def __init__(self, monitor='val_loss', value=0.00001, verbose=0):
super(Callback, self).__init__()
self.monitor = monitor
self.value = value
self.verbose = verbose
def on_epoch_end(self, epoch, logs={}):
current = logs.get(self.monitor)
if current is None:
warnings.warn("Early stopping requires %s available!" % self.monitor, RuntimeWarning)
if current < self.value:
if self.verbose > 0:
print("Epoch %05d: early stopping THR" % epoch)
self.model.stop_training = True
NAME = "TensBo{}".format(int(time.time()))
tensorboard = tf.keras.callbacks.TensorBoard(log_dir='logs/{}'.format(NAME))
# # DATA MANAGER: Define a function that imports and defines data
def data_manager(paths, col, row_drop, inout):
# # PREDICTION DATA
test_files = glob.glob(paths[0] + "/*.csv") # keeping directories in a list
n_test = len(test_files) # number of files
q = 0
test = [None]*n_test
for csv in test_files:
pred_data = pd.read_csv(csv, sep=';', encoding='cp1252')
t_step = pred_data.shape[0] # as all data has the same shape, we can keep these values for later use
#((1 t_step-1 inout[0]) (batch_sz t_step-1 inout[0]) (batch_sz t_step-1 inout[0]))
pred_input = np.array(np.reshape(pred_data.drop(columns=col, index=row_drop).values,
(1, t_step-1, inout[0])), dtype='float') #Remove selected columns and indexes. Reshape data
pred_output = np.array(pred_data.loc['1':, col[1]: col[2]], dtype='float')
test[q] = [pred_input, pred_output]
q = q + 1
# # TRAINING DATA
#Introduce the path and count files
train_files = glob.glob(paths[1] + "/*.csv") #keeping directories in a list
n_files = len(train_files) #number of files
#To check encoding of a file just print its path: with open(r'I:\05_Basanta Franco\Python\Data02\Data1574095060.csv') as f:
#print(f)
inputs = np.zeros([n_files*(t_step-1), inout[0]])
targets = np.zeros([n_files * (t_step-1), inout[1]])
i = t_step-1
j = 0
#import all the csv in files and store them in data
for csv in train_files:
matrix = pd.read_csv(csv, sep=';', encoding='cp1252')
data_in = matrix.drop(columns=col, index=row_drop).values
data_out = matrix.loc['1':, col[1]: col[2]].values
inputs[j:i, :] = data_in
targets[j:i, :] = data_out
i = i + t_step-1
j = j + t_step-1
batch_sz = n_files
# creating input an target tensors of size batch, timestep, inputs
inputs = np.reshape(inputs, (batch_sz, t_step-1, inout[0])) #input selection
targets = np.reshape(targets, (batch_sz, t_step-1, inout[1])) #target selection
return test, inputs, targets, n_test, t_step
# # PATHS
test_path = r'I:\05_Basanta Franco\Python\Test'
train_path = r'I:\05_Basanta Franco\Python\Data02'
model_path = r'I:\05_Basanta Franco\Python\model\model01\model{}.h5'
paths = [test_path, train_path, model_path]
# # IMPORT DATA
col = ['All calculations', 'MSNS-Trafo', 'MSNS-Trafo.1']
row_drop = 0
inout = [11, 2]
test, inputs, targets, n_test, t_step = data_manager(paths, col, row_drop, inout) #test is a list with test inputs and outputs.
# # NEURAL NETWORK CREATOR
# Creating a model, which is a linear stack of layers
model = Sequential()
'''
LSTM layer of n nodes. Shape of the input is the columns of inputs. activation function is rectifier linear function.
Return sequencies = true basically tells the layer to output a sequence. If we were to have another Recurrent layer, this is necessary. Else not, as it would not understand it
Time distribute is important. That basically relates every input step in the input sequence with its corresponding output.
Other way we would just be considering the last value of the sequence
'''
l1 = model.add(layers.LSTM(inout[0], input_shape=(t_step-1, inout[0]), activation='relu', return_sequences=True)) #adding a RNN layer
model.add(layers.TimeDistributed(layers.Dense(inout[0])))
l3 = model.add(layers.LSTM(30, activation='relu', return_sequences=True)) #adding a RNN layer
model.add(layers.TimeDistributed(layers.Dense(20)))
l4 = model.add(layers.LSTM(10, activation='relu', return_sequences=True)) #adding a RNN layer
model.add(layers.TimeDistributed(layers.Dense(10)))
model.add(layers.Dropout(0.2))
l5 = model.add(layers.Dense(2)) #fully connected layer. What i would understand as a normal layer
opt = optimizers.Adam(lr=1e-03) #how fast the learning rate decays. this helps finding the miminum better
callbacks = [EarlyStoppingByLossVal('val_loss', value=0.002),
ModelCheckpoint(filepath=model_path.format(int(time.time())), save_best_only=True)]
#
#compiling the model. Defining some of the features for the fit like the type of loss function, the optimizer and metrics that are interesting for us
model.compile(loss='mean_squared_error',
optimizer=opt,
metrics=['mse', 'mae']) # accuracy only valid for clasiffication tasks
history = model.fit(inputs, targets, epochs=50, validation_split=0.25, callbacks=callbacks)
# Evaluate the model
scores = model.evaluate(inputs, targets, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
# print a summary of the outputs of every layer
print(model.summary())
#SAVING THE MODEL
#
#The model is saved by modelcheckpoint in a folder. Here, we are saving the models arquitecture in a json file
#model_json = model.to_json()
#with open("model/model01/model.json", "w") as json_file:
# json_file.write(model_json)
# PREDICTIONS WITH THE MODEL
t = 1
fig1 = plt.figure()
for prediction in test:
NN_pred = model.predict(prediction[0])
#ANALYSIS
#reshape the prediction for plotting
NN_pred = np.reshape(NN_pred, (prediction[1].shape[0], inout[1]))
prediction[0] = np.reshape(prediction[0], (t_step-1, inout[0]))
#plots: top, predicted and desired test output. down, test inputs
plt.subplot(n_test, 1, t)
plt.title('Test0' + np.str(t))
plt.plot(NN_pred)
plt.plot(prediction[1])
plt.legend(['I_real_pred', 'I_im_pred', 'Ir', 'Ii'])
# mean squared error
rmse = sqrt(mean_squared_error(prediction[1], NN_pred))
print('Test RMSE: %.3f' % rmse)
t = t + 1
fig2 = plt.figure()
# plot loss during training
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
# plot mse during training
plt.subplot(212)
plt.title('Mean Squared Error')
plt.plot(history.history['mse'], label='train')
plt.plot(history.history['val_mse'], label='test')
# print inputs yes or no
printin = input('Print inputs as well? [y/n]: ')
m = True
while m == True:
if printin == 'y':
t = 1
fig3 = plt.figure()
for prediction in test:
plt.title('Inputs: V, P, Q')
plt.subplot(n_test, 1, t)
plt.plot(prediction[0])
t = t + 1
m = False
elif printin == 'n':
m = False
else:
printin = input('Answer not valid. Print inputs? [y/n]: ')
plt.show()
And here is codel model B.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import glob
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #this removes some warning comments. This warning comments express that this PC has a CPU able to
#compute much faster, and that tensorflow was not designed for it. For the moment, will keep it like this. If necessary, we'll use GPU
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow import Graph
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from keras import backend as K
from Model_A import EarlyStoppingByLossVal
K.clear_session()
def data_manager_1(paths, col, row_drop, inout):
# # PREDICTION DATA
test_files = glob.glob(paths[0] + "/*.csv") # keeping directories in a list
n_test = len(test_files) # number of files
q = 0
test = [None]*n_test
for csv in test_files:
pred_data = pd.read_csv(csv, sep=';', encoding='cp1252')
t_step = pred_data.shape[0] # as all data has the same shape, we can keep these values for later use
pred_input = np.array(np.reshape(pred_data.drop(columns=col, index=row_drop).values,
(t_step-1, 1, inout[0])), dtype='float') #Remove selected columns and indexes. Reshape data
pred_output = np.array(pred_data.loc['1':, col[1]: col[2]], dtype='float')
test[q] = [pred_input, pred_output]
q = q + 1
# # TRAINING DATA
#Introduce the path and count files
train_files = glob.glob(paths[1] + "/*.csv") #keeping directories in a list
n_files = len(train_files) #number of files
#To check encoding of a file just print its path: with open(r'I:\05_Basanta Franco\Python\Data02\Data1574095060.csv') as f:
#print(f)
inputs = np.zeros([n_files*(t_step-1), inout[0]])
targets = np.zeros([n_files * (t_step-1), inout[1]])
i = t_step-1
j = 0
#import all the csv in files and store them in data
for csv in train_files:
matrix = pd.read_csv(csv, sep=';', encoding='cp1252')
data_in = matrix.drop(columns=col, index=row_drop).values
data_out = matrix.loc['1':, col[1]: col[2]].values
inputs[j:i, :] = data_in
targets[j:i, :] = data_out
i = i + t_step-1
j = j + t_step-1
batch_sz = n_files
# creating input an target tensors of size batch, timestep, inputs
inputs = np.reshape(inputs, (inputs.shape[0], 1, inout[0])) #input selection
targets = np.reshape(targets, (targets.shape[0], 1, inout[1])) #target selection
return test, inputs, targets, n_test, t_step
# # PATHS
test_path = r'I:\05_Basanta Franco\Python\Test'
train_path = r'I:\05_Basanta Franco\Python\Data02'
model_path = r'I:\05_Basanta Franco\Python\model\model02\model{}.h5'
paths = [test_path, train_path, model_path]
# # IMPORT DATA
col = ['All calculations', 'MSNS-Trafo', 'MSNS-Trafo.1']
row_drop = 0
inout = [11, 2]
test, inputs, targets, n_test, t_step = data_manager_1(paths, col, row_drop, inout) #test is a list with test inputs and outputs.
# # CREATE THE MODEL
model02 = Sequential()
l1 = model02.add(layers.LSTM(inout[0], input_shape=(1, inout[0]), activation='relu', return_sequences=True)) #adding a RNN layer
model02.add(layers.TimeDistributed(layers.Dense(inout[0])))
l3 = model02.add(layers.LSTM(5, activation='relu', return_sequences=True)) #adding a RNN layer
model02.add(layers.TimeDistributed(layers.Dense(5)))
model02.add(layers.Dropout(0.2))
l5 = model02.add(layers.Dense(2)) #fully connected layer. What i would understand as a normal layer
#compiling the model. Defining some of the features for the fit like the type of loss function, the optimizer and metrics that are interesting for us
opt = optimizers.Adam(lr=1e-03) #how fast the learning rate decays. this helps finding the miminum better
callbacks = [EarlyStopping('val_loss', patience=20),
ModelCheckpoint(filepath=model_path.format(int(time.time())), save_best_only=True)]
model02.compile(loss='mean_squared_error',
optimizer=opt,
metrics=['mse', 'mae']) # accuracy only valid for clasiffication tasks
# train model and save history
history = model02.fit(inputs, targets, epochs=20, validation_split=0.25, callbacks=callbacks)
# plot loss during training
def train_plots(history):
fig2 = plt.figure()
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
# plot mse during training
plt.subplot(212)
plt.title('Mean Squared Error')
plt.plot(history.history['mse'], label='train')
plt.plot(history.history['val_mse'], label='test')
plt.show()
train_plots(history)
I tried to initialize graphs and sessions for the creation of the models, but it is not working.
Would it be possible(at least as a workaround) to kill each process after it finishes the task? Killing the process would ensure the release of memory of TensorFlow.
If the models need a communication channel/have intermediate results sent over, you could use queues or text files in order to solve this.
Just fixed it. My problem was that I was unable to reset the tensorflow session or clean the Graphs in my session to create and train a different model. I found that the command keras.backend.reset_uids() does that. Thank you anyway!