I was trying add custom metrics while training my LSTM using keras. See code below:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Masking, Dropout
from keras.optimizers import SGD, Adam, RMSprop
import keras.backend as K
import numpy as np
_Xtrain = np.random.rand(1000,21,47)
_ytrain = np.random.randint(2, size=1000)
_Xtest = np.random.rand(200,21,47)
_ytest = np.random.randint(1, size=200)
def t1(y_pred, y_true):
return K.tf.count_nonzero((1 - y_true))
def t2(y_pred, y_true):
return K.tf.count_nonzero(y_true)
def build_model():
model = Sequential()
model.add(Masking(mask_value=0, input_shape=(21, _Xtrain[0].shape[1])))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(1, activation='sigmoid'))
rms = RMSprop(lr=.001, decay=.001)
model.compile(loss='binary_crossentropy', optimizer=rms, metrics=[t1, t2])
return model
model = build_model()
hist = model.fit(_Xtrain, _ytrain, epochs=1, batch_size=5, validation_data=(_Xtest, _ytest), shuffle=True)
The output of the above code is as follows:
Train on 1000 samples, validate on 200 samples
Epoch 1/1
1000/1000 [==============================] - 5s - loss: 0.6958 - t1: 5.0000 - t2: 5.0000 - val_loss: 0.6975 - val_t1: 5.0000 - val_t2: 5.0000
So it appears that both methods t1 and t2 are producing the exact same output and it is baffling me. What could be going wrong and how could I get the complementary tensor to y_true?
Backstory: I was trying to write custom metrics (F1 score) in particular for my model. Keras does not seems to have those readily available. If anyone knows a better way, please help me get pointed to the right direction.
One easy way to handle this issue is to use a callback instead. Following the logic from this issue, you could specify a metrics call back that calculates any metric using sci-kit learn. For example, if you wanted to calculate f1, you could do the following:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Masking, Dropout
from keras.optimizers import SGD, Adam, RMSprop
import keras.backend as K
from keras.callbacks import Callback
import numpy as np
from sklearn.metrics import f1_score
_Xtrain = np.random.rand(1000,21,47)
_ytrain = np.random.randint(2, size=1000)
_Xtest = np.random.rand(200,21,47)
_ytest = np.random.randint(2, size=200)
class MetricsCallback(Callback):
def __init__(self, train_data, validation_data):
super().__init__()
self.validation_data = validation_data
self.train_data = train_data
self.f1_scores = []
self.cutoff = .5
def on_epoch_end(self, epoch, logs={}):
X_val = self.validation_data[0]
y_val = self.validation_data[1]
preds = self.model.predict(X_val)
f1 = f1_score(y_val, (preds > self.cutoff).astype(int))
self.f1_scores.append(f1)
def build_model():
model = Sequential()
model.add(Masking(mask_value=0, input_shape=(21, _Xtrain[0].shape[1])))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(1, activation='sigmoid'))
rms = RMSprop(lr=.001, decay=.001)
model.compile(loss='binary_crossentropy', optimizer=rms, metrics=['acc'])
return model
model = build_model()
hist = model.fit(_Xtrain, _ytrain, epochs=2, batch_size=5, validation_data=(_Xtest, _ytest), shuffle=True,
callbacks=[MetricsCallback((_Xtrain, _ytrain), (_Xtest, _ytest))])
Related
I know that there is a problem with reproducibility in keras. However due to my research I created a function:
def set_seed():
seed_value = 42
os.environ['PYTHONHASHSEED']=str(seed_value)
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf1.get_default_graph(), config=session_conf)
np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)
K.set_session(sess)
which should assure me a reproductive results in keras.
My problem
I'm trying to run self-created grid search on neural network using keras:
# Early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience = 50)
callbacks=[es]
# Possible learning rates
learning_rates = np.linspace(0.1, 10**(-5), 10)
# Run grid search on one layer
set_seed()
mse1 = np.array([])
rate1 = np.array([])
neuron_number1 = np.array([])
for rate in learning_rates[0:2]:
for neuron in range(1, 3):
model = Sequential()
model.add(Dense(neuron, input_dim=2, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, activation = 'relu'))
model.summary()
model.compile(loss='mse', optimizer=SGD(lr=rate), metrics=['mse'])
history = model.fit(X_train, y_train, epochs=1000, batch_size=50, validation_split=0.5, callbacks=[es])
mse1 = np.append(mse1, history.history['val_loss'][-1])
rate1 = np.append(rate1, rate)
neuron_number1= np.append(neuron_number1, neuron)
neural_summary1 = pandas.DataFrame(data = [neuron_number1, rate1, mse1])
neural_summary1 = neural_summary1.transpose()
neural_summary1.columns = ["number_of_neurons", "learning_rate", "mse"]
print(neural_summary1.iloc[neural_summary1['mse'].idxmin()])
number_of_neurons 2.000000
learning_rate 0.088890
mse 0.159922
But when I run it apart from grid search:
set_seed()
model = Sequential()
model.add(Dense(2, input_dim=2, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, activation = 'relu'))
model.summary()
model.compile(loss='mse', optimizer=SGD(lr=0.088890), metrics=['mse'])
history = model.fit(X_train, y_train, epochs=1000, batch_size=50, validation_split=0.5, callbacks=[es])
print(history.history['val_loss'][-1])
8.767917346954345
which is different than result obtained previously from grid search.
Do you know where is mistake in my code or why is not working?
Packages that I use are the following:
import pandas
import random
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.optimizers import SGD
import os
from keras import backend as K
I want to train a neural net for sentiment analysis. I have followed the tutorials on the keras webpage but I had to adapt the code to my usecase in order to be able to use the net afterwards.
For this purpose I decode back the texts from the imdb dataset from keras from numbers to text, and then I stemmize the text because I need to use the text stemmized. After that, since I want to control the way I am doing the word embeddings rather than using text_to_sequences an pad_sequences I am training a doc2vec embeddings and I am using it on the training set, so that I can obtain the embeddings from the text I want to classify.
The problem is that, the net does not learn anything, the accuracy does not improve and I can not reduce the loss function. I have tried many many things, like the architecture of the net, all the hyperparameters and changing the last layer from 2 nets to 1 and from sparse_categorical_entropy to binary_crossentropy. Let's see if anybody can help and show some light to my problem. I plug the code here and thanks in advance.
from keras.datasets import imdb
max_features = 40000
(training_data, training_targets), (testing_data, testing_targets) = imdb.load_data(num_words=max_features)
import numpy as np
data = np.concatenate((training_data, testing_data), axis=0)
targets = np.concatenate((training_targets, testing_targets), axis=0)
index = imdb.get_word_index()
reverse_index = dict([(value, key) for (key, value) in index.items()])
decoded = " ".join([reverse_index.get(i - 3, "") for i in data[0]])
import nltk
from nltk .stem import LancasterStemmer
toke_corpus = list()
lan = LancasterStemmer()
from tqdm import tqdm
lista_reviews = list()
for review in tqdm(data):
lista_reviews.append(np.array([lan.stem(reverse_index.get(i - 3, '')) for i in review][1:]))
train_x, test_x = lista_reviews[10000:], lista_reviews[:10000]
train_y, test_y = targets[10000:], targets[:10000]
from gensim.models.callbacks import CallbackAny2Vec
class EpochLogger(CallbackAny2Vec):
'''Callback to log information about training'''
def __init__(self):
self.epoch = 0
def on_epoch_begin(self, model):
print("Epoch #{} start".format(self.epoch))
def on_epoch_end(self, model):
print("Epoch #{} end".format(self.epoch))
self.epoch += 1
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
documents = [TaggedDocument(doc, [i]) for i, doc in enumerate(lista_reviews)]
print("DOcuments already built")
epoch_logger = EpochLogger()
model = Doc2Vec(documents, vector_size=512, window=5, min_count=3, workers=8, epochs = 7, callbacks=[epoch_logger])
encoded_x_train, encoded_x_test = list(), list()
from tqdm import tqdm
for i in tqdm(train_x):
encoded_x_train.append(model.infer_vector(i))
for k in tqdm(test_x):
encoded_x_test.append(model.infer_vector(k))
import keras
reduce_lr = keras.callbacks.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.50, patience=2, verbose=1, mode='auto', cooldown=0, min_lr=0.00001)
early = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=4, verbose=1, mode='auto')
from keras import models
from keras.models import Sequential
from keras import layers
from keras.layers import Embedding, Bidirectional, Dense, LSTM, Conv1D, MaxPooling1D, Flatten
model1 = Sequential()
model1.add(Embedding(input_dim = max_features, input_length=512, output_dim=128, trainable=False))
model1.add(Conv1D(filters=64,
kernel_size=5,
padding='valid',
activation='linear',
strides=1))
model1.add(MaxPooling1D(pool_size=4))
model1.add(Dense(64, activation='linear'))
model1.add(LSTM(32, activation='tanh'))
# model1.add(Dense(32, activation='relu'))
# model1.add(Flatten())
# model1.add(Dense(1, activation='sigmoid'))
model1.add(Dense(2, activation='softmax'))
model1.summary()
from keras import optimizers
# sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, amsgrad=False)
model1.compile(loss='sparse_categorical_crossentropy',
optimizer=adam,
metrics=['accuracy'])
history = model1.fit( np.array(encoded_x_train), np.array(train_y),
epochs= 20,
batch_size = 500,
validation_data = (np.array(encoded_x_test), np.array(test_y)), callbacks = [reduce_lr, early]
)
You use Doc2Vec to create sample embeddings. for this reason, I don't think that Embedding, Conv1D and MaxPooling1D layers are useful in your network. they are useful for word2vec where you can extract embeddings of each token and use them inside a network.
try to feed your network directly with your embedding in this way
model1 = Sequential()
model1.add(Dense(128, activation='relu', input_shape=(512,)))
# ....
model1.add(Dense(2, activation='softmax'))
adam = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, amsgrad=False)
model1.compile(loss='sparse_categorical_crossentropy',
optimizer=adam,
metrics=['accuracy'])
history = model1.fit( np.array(encoded_x_train), np.array(train_y),
epochs= 20,
batch_size = 500,
validation_data = (np.array(encoded_x_test), np.array(test_y)), callbacks = [reduce_lr, early]
)
I'm trying to change the learning rate of my model after it has been trained with a different learning rate.
I read here, here, here and some other places i can't even find anymore.
I tried:
model.optimizer.learning_rate.set_value(0.1)
model.optimizer.lr = 0.1
model.optimizer.learning_rate = 0.1
K.set_value(model.optimizer.learning_rate, 0.1)
K.set_value(model.optimizer.lr, 0.1)
model.optimizer.lr.assign(0.1)
... but none of them worked!
I don't understand how there could be such confusion around such a simple thing. Am I missing something?
EDIT: Working example
Here is a working example of what I'd like to do:
from keras.models import Sequential
from keras.layers import Dense
import keras
import numpy as np
model = Sequential()
model.add(Dense(1, input_shape=(10,)))
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(loss='mse',
optimizer=optimizer)
model.fit(np.random.randn(50,10), np.random.randn(50), epochs=50)
# Change learning rate to 0.001 and train for 50 more epochs
model.fit(np.random.randn(50,10), np.random.randn(50), initial_epoch=50, epochs=50)
You can change the learning rate as follows:
from keras import backend as K
K.set_value(model.optimizer.learning_rate, 0.001)
Included into your complete example it looks as follows:
from keras.models import Sequential
from keras.layers import Dense
from keras import backend as K
import keras
import numpy as np
model = Sequential()
model.add(Dense(1, input_shape=(10,)))
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(loss='mse', optimizer=optimizer)
print("Learning rate before first fit:", model.optimizer.learning_rate.numpy())
model.fit(np.random.randn(50,10), np.random.randn(50), epochs=50, verbose=0)
# Change learning rate to 0.001 and train for 50 more epochs
K.set_value(model.optimizer.learning_rate, 0.001)
print("Learning rate before second fit:", model.optimizer.learning_rate.numpy())
model.fit(np.random.randn(50,10),
np.random.randn(50),
initial_epoch=50,
epochs=50,
verbose=0)
I've just tested this with keras 2.3.1. Not sure why the approach didn't seem to work for you.
There is another way, you have to find the variable that holds the learning rate and assign it another value.
optimizer = tf.keras.optimizers.Adam(0.001)
optimizer.learning_rate.assign(0.01)
print(optimizer.learning_rate)
output:
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.01>
You can change lr during training with
from keras.callbacks import LearningRateScheduler
# This is a sample of a scheduler I used in the past
def lr_scheduler(epoch, lr):
decay_rate = 0.85
decay_step = 1
if epoch % decay_step == 0 and epoch:
return lr * pow(decay_rate, np.floor(epoch / decay_step))
return lr
Apply scheduler to your model
callbacks = [LearningRateScheduler(lr_scheduler, verbose=1)]
model = build_model(pretrained_model=ka.InceptionV3, input_shape=(224, 224, 3))
history = model.fit(train, callbacks=callbacks, epochs=EPOCHS, verbose=1)
You should define it in the compile function :
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['categorical_accuracy'])
Looking at your comment, if you want to change the learning rate after the beginning you need to use a scheduler : link
Edit with your code and scheduler:
from keras.models import Sequential
from keras.layers import Dense
import keras
import numpy as np
def lr_scheduler(epoch, lr):
if epoch > 50:
lr = 0.001
return lr
return lr
model = Sequential()
model.add(Dense(1, input_shape=(10,)))
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(loss='mse',
optimizer=optimizer)
callbacks = [keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=1)]
model.fit(np.random.randn(50,10), np.random.randn(50), epochs=100, callbacks=callbacks)
Suppose that you use Adam optimizer in keras, you'd want to define your optimizer before you compile your model with it.
For example, you can define
myadam = keras.optimizers.Adam(learning_rate=0.1)
Then, you compile your model with this optimizer.
I case you want to change your optimizer (with different type of optimizer or with different learning rate), you can define a new optimizer and compile your existing model with the new optimizer.
Hope this helps!
Some time ago I had a project for which I needed something similar. My idea to change the learning rate was to compile a new model with the new rate, then load the parameter weights from de old model to the new one.
For your example:
from keras.models import Sequential
from keras.layers import Dense
import keras
import numpy as np
# Initial model
model = Sequential()
model.add(Dense(1, input_shape=(10,)))
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(loss='mse', optimizer=optimizer)
model.fit(np.random.randn(50,10), np.random.randn(50), epochs=50)
# Change learning rate to 0.001 and train for 50 more epochs
new_model = Sequential()
new_model.add(Dense(1, input_shape=(10,)))
optimizer = keras.optimizers.Adam(lr=0.001)
new_model.compile(loss='mse', optimizer=optimizer)
new_model.set_weights(model.get_weights())
model = new_model
model.fit(np.random.randn(50,10), np.random.randn(50), initial_epoch=50, epochs=50)
With this you could see a worse fit of your model in the first epochs because ADAM uses previous steps to optimize and you will lose them.
Hope it helps someone!
I'm trying to get a very (over) simplified Keras binary classifier neural network running without success. The LOSS just stays constant. I've played around with Optimizers (SGD, Adam, RMSProp), Learningrates, Weight-Initializations, Batch Size and input data normalization so far.
Nothing changes at all. Am I doing something fundamentally wrong? Here is the code:
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
data = np.array(
[
[100,35,35,12,0],
[101,46,35,21,0],
[130,56,46,3412,1],
[131,58,48,3542,1]
]
)
x = data[:,1:-1]
y_target = data[:,-1]
x = x / np.linalg.norm(x)
model = Sequential()
model.add(Dense(3, input_shape=(3,), activation='softmax', kernel_initializer='lecun_normal',
bias_initializer='lecun_normal'))
model.add(Dense(1, activation='softmax', kernel_initializer='lecun_normal',
bias_initializer='lecun_normal'))
model.compile(optimizer=SGD(learning_rate=0.1),
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(x, y_target, batch_size=2, epochs=10,
verbose=1)
Softmax definition is:
exp(a) / sum(exp(a)
so when you use with a single neuron you will get:
exp(a) / exp(a) = 1
That is why your classifier doesn't work with a single neuron.
You can use sigmoid instead in this special case:
exp(a) / (exp(a) + 1)
Furthermore sigmoid function is for two class classifiers. Softmax is an extension of sigmoid for multiclass classifers.
For the first layer you should use relu or sigmoid function instead of softmax.
This is the working solution based on the feedback I got
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.utils import to_categorical
data = np.array(
[
[100,35,35,12,0],
[101,46,35,21,0],
[130,56,46,3412,1],
[131,58,48,3542,1]
]
)
x = data[:,1:-1]
y_target = data[:,-1]
x = x / np.linalg.norm(x)
model = Sequential()
model.add(Dense(3, input_shape=(3,), activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer=SGD(learning_rate=0.1),
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(x, y_target, epochs=1000,
verbose=1)
I am trying to solve a regression with Keras but MSE is huge, I mean like 29346217.6819
I am really new, so do you have any suggestions to make the model give reasonable mse? I am not sure even my data is OK or problematic but those are actual sales data.
Data (about to 3000 lines. I use 2000 for training and 1000 for testing)
Full data is here
ProductNo,Day,Month,CartonSales
1,6,02,2374
1,3,02,2374
1,6,04,2374
1,6,04,2374
1,3,06,2374
1,6,09,2374
1,1,09,2374
1,6,09,2374
1,6,10,2374
Code
from keras import optimizers
from keras.callbacks import Callback
from numpy import array
from keras.models import Sequential
from keras.layers import Dense, Dropout
from matplotlib import pyplot
import pandas as pds
# prepare sequence
class TestCallback(Callback):
def __init__(self, test_data):
self.test_data = test_data
def on_epoch_end(self, epoch, logs={}):
x, y = self.test_data
loss, acc = self.model.evaluate(x, y, verbose=0)
print('\nTesting loss: {}, acc: {}\n'.format(loss, acc))
dataframe = pds.read_csv('pmidata.csv', usecols=[0, 1, 2, 3])
dataframe = dataframe.sample(frac=1)
dataframeX_train = dataframe.iloc[0:2000][['ProductNo', 'Day', 'Month']]
dataframeY_train = dataframe.iloc[0:2000][['CartonSales']]
dataframeX_test = dataframe.iloc[2001:3001][['ProductNo', 'Day', 'Month']]
dataframeY_test = dataframe.iloc[2001:3001][['CartonSales']]
# create model
model = Sequential()
model.add(Dense(3, input_dim=3, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam', metrics=['mse'])
#sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
#model.compile(loss='mse', optimizer=sgd, metrics=['mse'])
# train model
#history = model.fit(dataframe, dataframe, epochs=500, batch_size=len(X), verbose=2)
history = model.fit(dataframeX_train, dataframeY_train, epochs=100, batch_size=4, verbose=2, callbacks=[TestCallback((dataframeX_test, dataframeY_test))])
# plot metrics
pyplot.plot(history.history['mean_squared_error'])
pyplot.show()
As far as i can tell from your code above, your y values are CartonSales. Sales can have large values and large range and that's probably why you get such a high error. You could use mean_squared_logarithmic_error instead of mean square error but i would suggest to do the following.
Continue using mean square error.
log transform you y values and later exp transform you predictions
import numpy as np
dataframeY_train = np.log(dataframeY_train)
dataframeY_test = np.log(dataframeY_test )
....
predictions=model.predict(dataframeX_test)[:,0]
predictions = np.exp(predictions)