Related
This is my model:
class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__()
self.hidden_size = 128
drp = 0.2
n_classes = len(le.classes_)
self.embedding = nn.Embedding(max_features, embed_size)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embed_size, self.hidden_size, bidirectional=True, batch_first=True)
self.linear = nn.Linear(self.hidden_size*4 , 128)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(drp)
self.out = nn.Linear(128, n_classes)
def forward(self, x):
#rint(x.size())
h_embedding = self.embedding(x)
_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0))
h_lstm, _ = self.lstm(h_embedding)
avg_pool = torch.mean(h_lstm, 1)
max_pool, _ = torch.max(h_lstm, 1)
conc = torch.cat(( avg_pool, max_pool), 1)
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
return out
n_epochs = 87
model = BiLSTM()
loss_fn = nn.CrossEntropyLoss(reduction='mean',)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.002)
model.cuda()
# Load train and test in CUDA Memory
x_train = torch.tensor(train_X, dtype=torch.long).cuda()
y_train = torch.tensor(train_y, dtype=torch.long).cuda()
x_cv = torch.tensor(test_X, dtype=torch.long).cuda()
y_cv = torch.tensor(test_y, dtype=torch.long).cuda()
# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)
# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=True)
train_loss = []
valid_loss = []
for epoch in range(n_epochs):
start_time = time.time()
# Set model to train configuration
model.train()
avg_loss = 0.
for i, (x_batch, y_batch) in enumerate(train_loader):
# Predict/Forward Pass
y_pred = model(x_batch)
# Compute loss
loss = loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_loss += loss.item() / len(train_loader)
#acc =n-avg_loss
# Set model to validation configuration
model.eval()
avg_val_loss = 0.
val_preds = np.zeros((len(x_cv),len(le.classes_)))
for i, (x_batch, y_batch) in enumerate(valid_loader):
y_pred = model(x_batch).detach()
avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
#val_accuracy = n- avg_val_loss
# keep/store predictions
val_preds[i * batch_size:(i+1) * batch_size] =F.softmax(y_pred).cpu().numpy()
val_preds=val_preds*5000
# Check Accuracy
val_accuracy = sum(val_preds.argmax(axis=1)==test_y)/len(test_y)
train_loss.append(avg_loss)
valid_loss.append(avg_val_loss)
elapsed_time = time.time() - start_time
print('Epoch {}/{} \t Train_loss={:.4f} \t val_loss={:.4f} \t val_acc={:.4f} \t time={:.2f}s'.format(
epoch + 1, n_epochs , avg_loss, avg_val_loss, val_accuracy, elapsed_time))
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy this is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy, The size of the dataset is 3000
[1]: https://i.stack.imgur.com/NbK92.png [output looks like this][1]
[1]: https://i.stack.imgur.com/ll12J.png [Data Looks Like this][1]
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
####### load the model and data here
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([32,64,128,256, 512]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.9))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['Nadam','SGD','RMSprop','adam','Adagrad']))
HP_L2 = hp.HParam('l2 regularizer', hp.RealInterval(.00001,.01))
HP_LeakyReLU=hp.HParam('alpha', hp.RealInterval(0.1, 0.9))
METRIC_ACCURACY = 'accuracy'
with tf.summary.create_file_writer('raw-img/log/hparam_tuning/').as_default():
hp.hparams_config(
hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER,HP_L2,HP_LeakyReLU],
metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
)
def train_test_model(hparams):
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(hparams[HP_NUM_UNITS], kernel_regularizer=tf.keras.regularizers.l2(0.001)),
tf.keras.layers.LeakyReLU(hparams[HP_LeakyReLU]),
tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
tf.keras.layers.Dense(10, activation='softmax'),
])
model.compile(
optimizer=hparams[HP_OPTIMIZER],
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
)
model.fit(x_train, y_train, epochs=2)
_, accuracy = model.evaluate(x_test, y_test)
return accuracy
def run(run_dir, hparams):
with tf.summary.create_file_writer(run_dir).as_default():
hp.hparams(hparams) # record the values used in this trial
accuracy = train_test_model(hparams)
tf.summary.scalar(METRIC_ACCURACY, accuracy, step=2)
session_num = 0
for num_units in HP_NUM_UNITS.domain.values:
for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
for l2 in (HP_L2.domain.min_value, HP_L2.domain.max_value):
for alpha in (HP_LeakyReLU.domain.min_value, HP_LeakyReLU.domain.max_value):
for optimizer in HP_OPTIMIZER.domain.values:
hparams = {
HP_NUM_UNITS: num_units,
HP_DROPOUT: dropout_rate,
HP_L2: l2,
HP_LeakyReLU:alpha,
HP_OPTIMIZER: optimizer,
}
run_name = "run-%d" % session_num
print('--- Starting trial: %s' % run_name)
print({h.name: hparams[h] for h in hparams})
run('raw-img/log/hparam_tuning/' + run_name, hparams)
session_num += 1
I have tried to use hparams in TF. I have set dropout, l2 and OPTIMIZER.
I need to set value for learning_rate and test it.
What should I do to set learning_rate like dropout and l2 and test it?
I have tried to do this:
model.compile(
optimizer=hparams[HP_OPTIMIZER](lr=0.001),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
but it doesn't work. I want to select learning_rate different value of learning_rate like(dropout,l2)
You want to separate the used optimizer into a separate variable:
if hparams[HP_OPTIMIZER] == "SGD":
optimizer = tf.keras.optimizers.SGD(learning_rate=float(hparams[HP_LR]))
elif hparams[HP_OPTIMIZER] == "adam":
optimizer = tf.keras.optimizers.Adam(learning_rate=float(hparams[HP_LR]))
else:
raise ValueError("unexpected optimizer name: %r" % hparams[HP_OPTIMIZER])
model.compile(
optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
)
I found the solution here.
I am trying to implement a semantic segmentation task over a kaggle dataset https://www.kaggle.com/franciscoescobar/satellite-images-of-water-bodies.
I have processed the data using Keras datagenerator and it looks like this -
seed = 42
BATCH_SIZE = 50
HEIGHT = 256
WIDTH = 256
image_datagen = ImageDataGenerator(width_shift_range=0.1,
rescale=1./255,
validation_split=0.1,
height_shift_range=0.1,)
mask_datagen = ImageDataGenerator(width_shift_range=0.1,
validation_split=0.1,
rescale = 1./255,
height_shift_range=0.1,)
train_image_generator =image_datagen.flow_from_directory("data/image/",
color_mode = 'grayscale',
class_mode=None, seed=seed,subset='training',
batch_size = BATCH_SIZE,
target_size = (HEIGHT,WIDTH))
train_mask_generator = mask_datagen.flow_from_directory("data/mask/",
color_mode = 'grayscale',
class_mode=None, seed=seed ,subset='training' ,
batch_size = BATCH_SIZE,
target_size = (HEIGHT,WIDTH))
validation_image_generator =image_datagen.flow_from_directory("data/image/",
color_mode = 'grayscale',
class_mode=None, seed=seed,subset='validation',
batch_size = BATCH_SIZE,
target_size = (HEIGHT,WIDTH))
validation_mask_generator = mask_datagen.flow_from_directory("data/mask/",
color_mode = 'grayscale',
class_mode=None, seed=seed ,subset='validation',
batch_size = BATCH_SIZE,
target_size = (HEIGHT,WIDTH))
train_generator = zip(train_image_generator, train_mask_generator)
val_generator = zip(validation_image_generator , validation_mask_generator)
but after that when I have trained the model using the model.fit method,
code -
EPOCHS = 70
callbacks = [WandbCallback() , tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience = 1)]
BATCH_SIZE = 50
history = model.fit(train_generator ,
epochs = EPOCHS,
validation_data = val_generator,
verbose=1,
batch_size = BATCH_SIZE,
callbacks=callbacks)
But the epochs are taking much more time than it usually takes and the epoch run shows something like this
Epoch 1/70
1557/Unknown - 2038s 1s/step - loss: 0.2131 - accuracy: 0.5625
Why is it taking too much time?? anybody, please help.
I am also attaching the corresponding notebook I made: link
Hello I am working on detecting diabetic retinopathy from images in the kaggle dataset (https://www.kaggle.com/c/diabetic-retinopathy-detection) using VGG19, I have pre processed my images with a gaussian filter and resizing them to 224 x 224 x 3 an then I passed them to a vgg19 model with training images = 5760 and validation images = 640. I am getting a training accuracy of 100% but the validation accuracy is not getting better than 60% and the prediction accuracy after the model finished is 50%.
This is my model code:
for count in range(6):
print("*************************************************************************************************")
print("******************************************************Fold " + str(count))
trainDirectory = "Folds/Fold" + str(count) + "-Train"
testDirectory = "Folds/Fold" + str(count) + "-Test"
trdata = ImageDataGenerator()
traindata = trdata.flow_from_directory(class_mode = 'binary' , batch_size= 64, directory= trainDirectory , target_size=(224, 224))
tsdata = ImageDataGenerator()
testdata = tsdata.flow_from_directory(class_mode = 'binary' , batch_size=64, directory= testDirectory, target_size=(224, 224))
from keras.applications.vgg19 import VGG19
vggmodel = VGG19(weights='imagenet', include_top=True, input_shape=(224, 224, 3))
for layers in (vggmodel.layers)[:19]:
layers.trainable = False
X = vggmodel.layers[-2].output
predictions = Dense(1, activation="sigmoid")(X)
model_final = Model(vggmodel.input, predictions)
opt = keras.optimizers.Adam(learning_rate=0.0001)
model_final.compile(loss="binary_crossentropy", optimizer=opt,
metrics=["accuracy"])
from keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint("vgg19_1.h5", monitor='val_accuracy', verbose=1, save_best_only=True,
save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=50, verbose=1, mode='auto')
h = model_final.fit_generator(generator=traindata, steps_per_epoch= int(5760 / 64), epochs= 10 , validation_data=testdata,
validation_steps= int(640 / 64) , callbacks=[checkpoint, early])
modelHistory.append(h.history)
model_final.save_weights("vgg19_1.h5")
model_final.save('vgg19_1.h5')
model = load_model('vgg19_1.h5')
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
predict_datagen = ImageDataGenerator()
predict_generator = predict_datagen.flow_from_directory(
"Validation",
class_mode= None,
target_size=(224, 224),
)
pred = model.predict_generator(predict_generator)
y_pred = np.argmax(pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(predict_generator.classes, y_pred))
print('Classification Report')
target_names = ['0', '1']
print(classification_report(predict_generator.classes, y_pred, target_names=target_names))
l = len(y_pred)
acc = sum([y_pred[i]==predict_generator.classes[i] for i in range(l)])/l
print(acc)
predictedAccuracy.append(acc)
this is the accuracy that I have got:
this is the plot of the accuracy / loss
I am performing binary classification for classifying the reviews according to polarity.
I have created an ensemble model in Keras using CNN and BiLSTM as a base model and a neural network as meta-learner. I am using Stacking. But I am getting Stack ensemble accuracy as 0%. What all changes should I make in my code to run smoothly? Where I am getting wrong?
text_input_layer = Input(shape=(length_trainshuffle,))
embedding_layer = Embedding(vocab_size, 100)(text_input_layer)
text_layer_cnn = Conv1D(128, 5, activation='relu')(embedding_layer)
text_layer_cnn = GlobalMaxPooling1D()(text_layer_cnn)
text_layer_cnn = Dropout(0.2)(text_layer_cnn)
text_layer_cnn = Dense(5,kernel_initializer='glorot_uniform', activation='tanh')(text_layer_cnn)
output_layer_cnn = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_cnn)
model_cnn = Model(text_input_layer, output_layer_cnn)
optimizer = Adamax(lr=0.001,decay=0.0001)
model_cnn.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
filepath_cnn="cnnmodel.best.hdf5"
checkpoint_cnn = ModelCheckpoint(filepath_cnn, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_cnn = [checkpoint_cnn]
# Fit the model
model_cnn.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_cnn, verbose=1)
model_cnn.save(filepath_cnn)
print('>Saved %s' % filepath_cnn)
loss_cnn, acc_cnn = model_cnn.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy CNN: %f' % (acc_cnn*100))
print('Loss CNN: %f' %(loss_cnn))
text_layer_bilstm = Bidirectional(CuDNNLSTM(256))(embedding_layer)
output_layer_bilstm = Dense(1, kernel_initializer='glorot_uniform',activation='sigmoid')(text_layer_bilstm)
model_bilstm = Model(text_input_layer, output_layer_bilstm)
optimizer_bilstm = Adamax(lr=0.001,decay=0.0001)
model_bilstm.compile(optimizer=optimizer_bilstm, loss='binary_crossentropy', metrics=['accuracy'])
filepath_bilstm="bilstm_model.best.hdf5"
checkpoint_bilstm = ModelCheckpoint(filepath_bilstm, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list_bilstm = [checkpoint_bilstm]
# Fit the model
model_bilstm.fit(trainX, array(trainlabelshuffle), epochs=10,batch_size=80, validation_data = (validateX, array(validatelabelshuffle)), callbacks=callbacks_list_bilstm, verbose=1)
model_bilstm.save(filepath_bilstm)
print('>Saved %s' % filepath_bilstm)
loss_bilstm, acc_bilstm = model_bilstm.evaluate(testX,array(testlabelshuffle), verbose=0)
print('Test Accuracy bilstm: %f' % (acc_bilstm*100))
print('Loss bilstm: %f' %(loss_bilstm))
all_models = list()
cnnmodel = load_model(filepath_cnn)
# add to list of members
all_models.append(cnnmodel)
print('>loaded %s' % filepath_cnn)
bilstmmodel = load_model(filepath_bilstm)
# add to list of members
all_models.append(bilstmmodel)
print('>loaded %s' % filepath_bilstm)
def define_stacked_model(all_models):
# update all layers in all models to not be trainable
for i in range(len(all_models)):
model = all_models[i]
for layer in model.layers:
# make not trainable
layer.trainable = False
# rename to avoid 'unique layer name' issue
layer.name = 'ensemble_' + str(i+1) + '_' + layer.name
# define multi-headed input
ensemble_visible = [model.input for model in all_models]
# concatenate merge output from each model
ensemble_outputs = [model.output for model in all_models]
merge = concatenate(ensemble_outputs)
hidden = Dense(10, activation='relu')(merge)
#hidden = Flatten()(hidden)
output = Dense(2, activation='softmax')(hidden)
model = Model(inputs=ensemble_visible, outputs=output)
# plot graph of ensemble
plot_model(model, show_shapes=True, to_file='model_graph.png')
# compile
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def fit_stacked_model(model, inputX, inputy):
# prepare input data
X = [inputX for _ in range(len(model.input))]
# encode output data
inputy_enc = to_categorical(inputy)
# fit model
model.fit(X, inputy_enc, epochs=10, verbose=1)
# make a prediction with a stacked model
def predict_stacked_model(model, inputX):
# prepare input data
X = [inputX for _ in range(len(model.input))]
# make prediction
return model.predict(X, verbose=0)
stacked_model = define_stacked_model(all_models)
stacked_model.summary()
# fit stacked model on test dataset
fit_stacked_model(stacked_model,validateX,array(validatelabelshuffle))
#stacked_model.fit(X=testX,y=array(testlabelshuffle),epochs=10, verbose=1)
# make predictions and evaluate
yhat = predict_stacked_model(stacked_model, testX)
yhat = argmax(yhat, axis=1)
acc = accuracy_score(array(testlabelshuffle), yhat)
print('Stacked Test Accuracy: %.3f' % acc)