I am trying to create a model to predict if the text is a work reference or work description:
code:
X = df.v2
Y = df.v1
le = LabelEncoder()
Y = le.fit_transform(Y)
Y = Y.reshape(-1,1)
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15)
max_words = 1000
max_len = 150
tok = Tokenizer(num_words=max_words)
tok.fit_on_texts(X_train)
sequences = tok.texts_to_sequences(X_train)
sequences_matrix = sequence.pad_sequences(sequences,maxlen=max_len)
def RNN():
inputs = Input(name='inputs',shape=[max_len])
layer = Embedding(max_words,50,input_length=max_len)(inputs)
layer = LSTM(64)(layer)
layer = Dense(256,name='FC1')(layer)
layer = Activation('relu')(layer)
layer = Dropout(0.5)(layer)
layer = Dense(1,name='out_layer')(layer)
layer = Activation('sigmoid')(layer)
model = Model(inputs=inputs,outputs=layer)
return model
model = RNN()
model.summary()
model.compile(loss='binary_crossentropy',optimizer=RMSprop(),metrics=['accuracy'])
model.fit(sequences_matrix,Y_train,batch_size=128,epochs=10,
validation_split=0.2,callbacks=[EarlyStopping(monitor='val_loss',min_delta=0.0001)])
test_sequences = tok.texts_to_sequences(X_test)
test_sequences_matrix = sequence.pad_sequences(test_sequences,maxlen=max_len)
accr = model.evaluate(test_sequences_matrix,Y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0],accr[1]))
sample_texts = ["this is sample input"]
txts = tok.texts_to_sequences(sample_texts)
txts = sequence.pad_sequences(txts, maxlen=max_len)
preds = model.predict(txts)
print(preds)
Results:
Loss: 0.049
Accuracy: 0.980
preds = 0.95173115
My question: how do I output the label instead of score? Can anyone point me in the right direction, please?
SOURCE: https://www.kaggle.com/kredy10/simple-lstm-for-text-classification
Related
I am building a model to classify news (AG news dataset). The vocab size ~33k with custom embedding layer. I have run this for 20 epochs but the loss and accuracy (1.3 and 26% respec.) is almost constant even at the end of 20th epoch. Can someone please help me with this? Also, am I feeding the correct input to the fc layer? I am using CrossEntropyLoss as the loss function.
Here is my model class:
class NewsClassifier(nn.Module):
def __init__(self, vocab_weights = None, rnn_type = 'LSTM', vocab_size = len(vocab.vocab), n_classes = 4, embed_size = 300, rnn_units = 512, \
n_layers = 2, bi_dir = True, rnn_drop = 0.0, padding_index = vocab['<unk>']):
super().__init__()
self.rnn_units = rnn_units
self.n_classes = n_classes
self.rnn_type = rnn_type
if vocab_weights:
self.embedding = nn.Embedding.from_pretrained(torch.as_tensor(vocab_weights))
else:
self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx = padding_index)
if rnn_type == 'LSTM':
self.rnn = nn.LSTM(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
elif rnn_type == 'GRU':
self.rnn = nn.GRU(embed_size, rnn_units, num_layers = n_layers, bidirectional = bi_dir, dropout = rnn_drop)
else:
raise NotImplementError
self.fc = nn.Linear(2 * rnn_units if bi_dir else rnn_units, self.n_classes)
def forward(self, data, lens):
x_embed = self.embedding(data) # (padded_lens, batch_size, embed_dim)
x_packed = pack_padded_sequence(x_embed, lens.cpu(), enforce_sorted = False) #packing sequences and passing to RNN unit
if self.rnn_type == 'LSTM':
output_packed, (hidden,cell) = self.rnn(x_packed) #output is packed and cannot be fed to linear layers
else:
output_packed, hidden = self.rnn(x_packed) #For GRU there is only hidden state
#Though n number of layers are stacked the output is always 1
output_padded, _ = pad_packed_sequence(output_packed) #output is padded to be fed to linear layer (padded_lens, batch size, hidden_units)
#Picking only the last output --> equivalent to reutrn_sequences = False in Keras
out_reduced = torch.cat((output_padded[-1, :, : self.rnn_units], output_padded[-1, :, self.rnn_units :]), 1)
return self.fc(out_reduced)
model = NewsClassifier()
print(f'The total number of trainable parameters are : {sum(p.numel() for p in model.parameters() if p.requires_grad)}')
My training function is:
def train(model, iterator = trainDataloader, optimizer = optimizer, loss_fn = criterion):
e_loss = e_acc = i = 0
model.train()
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
optimizer.zero_grad()
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
loss.backward()
optimizer.step()
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
def predict(model, iterator = testDataloader, loss_fn = criterion):
e_loss = e_acc = i = 0
model.eval()
with torch.no_grad():
for inputs, leng, labels in iterator:
inputs, leng, labels = inputs.to(device), leng.to(device), labels.to(device)
preds = model(inputs, leng).squeeze(1)
loss = loss_fn(preds, labels.long())
acc = accuracy(preds, labels)
e_loss += loss.item()
e_acc += acc.item()
i += 1
return e_loss/i, e_acc/i
N_EPOCHS = 20
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model)
valid_loss, valid_acc = predict(model)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut1-model.pt')
print(f'Epoch: {epoch+1:02} / {N_EPOCHS} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
I am working on a text classification problem with a binary output 0 or 1. The accuracy, train loss and test loss remains the same. The accuracy is exact the same for all the epochs. All the steps looks very correct. I had tried out several ways to figure out what is going wrong. Nothing actually worked. Please help.
class RNN(nn.Module):
def __init__(self, num_layers, num_classes, input_size, hidden_size,vocab,dropout):
super(RNN,self).__init__()
self.num_layers = num_layers
self.num_classes = num_classes
self.input_size = input_size
self.hidden_size = hidden_size
self.sequence_len = sequence_len
self.embedding = nn.Embedding(len(vocab),input_size)
nn.init.xavier_normal_(self.embedding.weight)
self.rnn = nn.RNN(input_size, hidden_size,num_layers,dropout = dropout, nonlinearity = 'tanh', batch_first=True, bias = True, bidirectional = False)
self.linear = nn.Linear(hidden_size, 1)
nn.init.xavier_normal_(self.linear.weight)
self.sigmoid = nn.Sigmoid()
def forward(self,x):
lens = list(map(len, x))
padded = pad_sequence(x, batch_first=True)
output_embedding=self.embedding(padded)
packed = pack_padded_sequence(input = output_embedding,lengths = lens, batch_first=True, enforce_sorted=False)
input_rnn = packed
h011 = torch.zeros(1,32,3)
output_11, hidden_11 = self.rnn(input_rnn,h011)
output_padded, output_lengths = pad_packed_sequence(output_11, batch_first=True)
final_output_11 = self.linear(hidden_11)
prob_11 = self.sigmoid(final_output_11)
return output_padded, hidden_11, prob_11
num_layers = 1
num_classes = 2
input_size = 5
hidden_size = 3
criterion = nn.BCELoss()
sequence_len = 1
dropout = 0.5
rnn = RNN(num_layers, num_classes, input_size, hidden_size,vocab,dropout)
epochs = 10
lr = 0.01
weight_decay=0.011
def train_loop(model,criterion,optimizer,train_loader,valid_loader,epochs):
train_losses= []
valid_losses= []
for epoch in range(epochs):
train_loss=0
for label,text in train_loader:
output,hidden,prob = rnn.forward(text)
prob = torch.tensor([item.item() for sublist in prob for item in sublist],dtype = torch.float32)
label = torch.tensor(label, dtype = torch.float32)
loss=criterion(prob,label)
optimizer.zero_grad()
loss.requires_grad = True
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss=train_loss/len(train_loader)
valid_loss=0
with torch.no_grad():
correct=0
total=0
for label,text in valid_loader:
output,hidden,prob = rnn.forward(text)
prob = torch.tensor([item.item() for sublist in prob for item in sublist],dtype = torch.float32)
label = torch.tensor(label, dtype = torch.float32)
loss=criterion(prob,label)
valid_loss += loss.item()
p = torch.tensor([1 if i > 0.5 else 0 for i in prob.data], dtype = torch.float32)
#predicted = torch.max(p, 1)
total += label.size(0)
correct += (p == label).sum().item()
valid_loss=valid_loss/len(valid_loader)
accuracy = 100 * correct / total
print(accuracy)
scheduler.step(accuracy)
train_losses.append(train_loss)
valid_losses.append(valid_loss)
print(f'Epoch {epoch+1:<2d}/{epochs} --> Train Loss: {train_loss:.4f} | Valid Loss: {valid_loss:.4f}')
from torch.optim.lr_scheduler import StepLR
from torch.optim.lr_scheduler import ReduceLROnPlateau
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.4, patience=5,
verbose=True) # need to change scheduler.step
train_loop(rnn,criterion,optimizer,train_loader,valid_loader,epochs)
Output:
I have a dataset of Tweets labelled with sentiments. I have pre-processed the data and done parts of speech tagging (all via NLTK in python). After preprocessing the data looks like this:
Pre-processed tweets
After preprocessing training data is prepared with the following code:
full_text = list(train['content'].values) + list(test['content'].values)
tokenizer = Tokenizer(num_words=20000,lower = True, filters = '')
tokenizer.fit_on_texts(full_text)
train_tokenized = tokenizer.texts_to_sequences(train['content'])
test_tokenized = tokenizer.texts_to_sequences(test['content'])
max_len = 50
X_train = pad_sequences(train_tokenized, maxlen = max_len)
X_test = pad_sequences(test_tokenized, maxlen = max_len)
embed_size = 300
max_features = 20000
def get_coefs(word,*arr):
return word, np.asarray(arr, dtype='float32')
def get_embed_mat(embedding_path):
embedding_index = dict(get_coefs(*o.strip().split(" ")) for o in open(embedding_path,encoding="utf8"))
word_index = tokenizer.word_index
nb_words = min(max_features, len(word_index))
print(nb_words)
embedding_matrix = np.zeros((nb_words + 1, embed_size))
for word, i in word_index.items():
if i >= max_features:
continue
embedding_vector = embedding_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
return embedding_matrix
A deep learning model is built with Word embeddings as a layer. The code for model building is given below:
def build_model1(lr = 0.0, lr_d = 0.0, units = 0, dr = 0.0):
inp = Input(shape = (max_len,))
x = Embedding(20001, embed_size, weights = [embedding_matrix], trainable = False)(inp)
x1 = SpatialDropout1D(dr)(x)
x_lstm = Bidirectional(LSTM(units, return_sequences = True))(x1)
x1 = Conv1D(32, kernel_size=2, padding='valid', kernel_initializer='he_uniform')(x_lstm)
avg_pool1_lstm1 = GlobalAveragePooling1D()(x1)
max_pool1_lstm1 = GlobalMaxPooling1D()(x1)
x_lstm = Bidirectional(LSTM(units, return_sequences = True))(x1)
x1 = Conv1D(32, kernel_size=2, padding='valid', kernel_initializer='he_uniform')(x_lstm)
avg_pool1_lstm = GlobalAveragePooling1D()(x1)
max_pool1_lstm = GlobalMaxPooling1D()(x1)
x = concatenate([avg_pool1_lstm1, max_pool1_lstm1,
avg_pool1_lstm, max_pool1_lstm])
#x = BatchNormalization()(x)
x = Dropout(0.1)(Dense(128,activation='relu') (x))
x = BatchNormalization()(x)
x = Dropout(0.1)(Dense(64,activation='relu') (x))
x = Dense(8, activation = "sigmoid")(x)
model = Model(inputs = inp, outputs = x)
model.compile(loss = "binary_crossentropy", optimizer = Adam(lr = lr, decay = lr_d), metrics = ["accuracy"])
history = model.fit(X_train, y_one_hot, batch_size = 128, epochs = 20, validation_split=0.1,
verbose = 1, callbacks = [check_point, early_stop])
model = load_model(file_path)
return model
I want to use LIME to explain the predictions of this model (as given in the below image). But it is not working.
Lime Text explanation of Model
I was trying to run model.fit. I had a data of 5486 rows with target shape (5486,3). I noticed that while running the model.fit, it printed Running on 4489 training samples and 997 validation samples and it started with
Epoch 1/32
5/4856 [>.............................] - ETA: 180s - loss: 0.3286 - accuracy: 0.5406.............
something
Next day I tried another script and while applying fit method it printed
The first code that showed 5/4489 is
`
input_txt = Input(shape=(100,), dtype='int32')
txt = Masking(mask_value=0)(input_txt)
txt = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix], input_length=max_seq_len, trainable=False)(txt)
txt = Conv1D(32, 5,trainable = False)(txt)
txt = Conv1D(60, 4,trainable = False)(txt)
txt = Conv1D(100, 3,trainable = False)(txt)
text_lstm = Bidirectional(LSTM(30,return_sequences=True,trainable= False))(txt)
text_lstm = Bidirectional(LSTM(30,return_sequences=True,trainable= False))(text_lstm)
text_lstm = Bidirectional(LSTM(30,return_sequences=False,trainable= False))(text_lstm)
lstm = Dense(512, activation='relu')(text_lstm)
lstm = Dropout(0.8)(lstm)
input_img = Input(shape=(224,224,3))
model = VGG16(weights='imagenet', include_top=False)
model.trainable = False
x = model(input_img)
flatten = Flatten()(x)
flatten = Dense(1024, activation='relu')(flatten)
flatten = Dense(512, activation='relu')(flatten)
flatten = Dropout(0.8)(flatten)
merged = concatenate([lstm,flatten], axis=1)
dense = Dense(1024, activation='relu')(merged)
dense = Dropout(0.6)(dense)
dense = Dense(512, activation='relu')(dense)
dense = Dense(256, activation='relu')(dense)
dense = Dense(128, activation='relu')(dense)
dense = Dense(3, activation='softmax')(dense)
model = Model(inputs=(input_img,input_txt), outputs=dense)
model.compile(loss='categorical_crossentropy',optimizer=keras.optimizers.Adam(lr=2e-5),metrics=["accuracy"])
X_trn = [image_data,text_data]
y_trn = labels
model.fit(X_trn,y_trn,validation_split=0.2,epochs = 32, batch_size = 64)
`
Epoch 1/32
5/151 [>.............................] - ETA: 40s - loss: 0.0286 - accuracy: 0.5406.............
And the code that gave the above verbose was
`
vgg = VGG19(weights='imagenet',include_top=False)
vgg.trainable = False
img_in = Input(shape=(224,224,3),dtype = 'int32')
img = vgg(img_in)
img = Flatten()(img)
img = Dense(1024,activation='sigmoid')(img)
img = Dense(512,activation='sigmoid')(img)
img = Dense(64,activation='relu')(img)
img = Dense(32,activation = 'relu')(img)
out = Dense(3,activation='softmax')(img)
model = Model(inputs = [img_in],outputs=out)
model.compile(loss='categorical_crossentropy',
optimizer=Adam(lr=2e-5),
# optimizer=optimizers.RMSprop(),
metrics=["accuracy"]
model.fit(x=images_data[4856:],y= data_labels[4856:],validation_data=(images_data[:4856], data_labels[:4856]),batch_size =64, epochs=32)
`
I noticed that data drop from 4489 to 101. I tried checking everything, but didn't got anywhere. Someone please explain what happened.
Thanks in advance
I am training my own model using Tensorflow. However, I got some trouble when I change my activation function from Relu to Selu.
This is what happened. Learning curve drops accidentally and I have no idea about what's going on.
my learning curve
like this.
For what I have known, Selu can prevent overfitting, so I try to implement it in my model. Is there any tips, or any condition when I want to use Selu?
This is my code:
this is the place where I change my activation function
-----
def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name):
conv_layer = tf.layers.conv2d(x_tensor, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.selu, name = layer_name)
conv_layer = tf.layers.max_pooling2d(conv_layer, pool_size=pool_ksize, strides=pool_strides)
return conv_layer
-----
graph
tf.reset_default_graph()
#### placeholder ####
input_img = tf.placeholder(dtype=tf.float32, shape=(None, img_size, img_size, 3))
y_true = tf.placeholder(dtype=tf.float32, shape=(None, num_class))
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
lr_in = tf.placeholder(dtype = tf.float32, name = 'learning_rate')
conv_ksize = (3,3)
conv_strides = (1,1)
pool_ksize = (2,2)
pool_strides = (2,2)
n_filters_1 = 32
n_filters_2 = 64
n_filters_3 = 128
n_filters_4 = 256
onebyone_ksize = (1,1)
#CNN
conv_1 = conv2d_maxpool(input_img, n_filters_1, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv1")
# conv_1 = tf.layers.conv2d(conv_1, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.relu)
# conv_1_norm = tf.layers.batch_normalization(conv_1, name = "batch_norm1")
# conv_1_dropout = tf.layers.dropout(conv_1_norm, rate = keep_prob, training = True, name = "dropout1")
conv_2 = conv2d_maxpool(conv_1, n_filters_2, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv2")
# conv_2_norm = tf.layers.batch_normalization(conv_2)
conv_3 = conv2d_maxpool(conv_2, n_filters_3, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv3")
# conv_3_norm = tf.layers.batch_normalization(conv_3, name = "batch_norm3")
# conv_3_dropout = tf.layers.dropout(conv_3_norm, rate = keep_prob, training = True, name = "dropout3")
conv_4 = conv2d_maxpool(conv_3, n_filters_4, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv4")
flatten = tf.layers.flatten(conv_4)
fc1 = tf.layers.dense(flatten, 256, activation = tf.nn.relu)
out = tf.layers.dense(fc1, 6, activation=None, name= "logits") #logit
predict = tf.nn.softmax(out)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = out, labels = y_true))
optimizer = tf.train.AdamOptimizer(lr).minimize(cost)
##accuracy
correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
Training
#history/record
train_loss, train_acc = [], []
valid_loss, valid_acc = [], []
update_per_epoch = int(np.floor(X_train.shape[0] / batch_size))
## early stopping and learning rate congig
es_patience = 10
es_n = 0
lr_patience = 3
lr_n = 0
save_model_path = './save'
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
# Initializing the variables
batch_gen = img_gen.flow(generator_input(X_train), y_train, batch_size = 32)
val_batch_gen = img_gen.flow(generator_input(X_valid), y_valid, batch_size = len(X_valid))
for i in range(epoch):
epoch_loss = 0
epoch_acc = 0
for j in range(update_per_epoch):
image, label = next(batch_gen)
_, this_loss, this_acc = sess.run([optimizer, cost, accuracy], feed_dict={
input_img : image,
y_true : label,
lr_in: lr,
keep_prob : keep_probability
})
epoch_loss += this_loss
epoch_acc += this_acc
## end of epoch
epoch_loss /= update_per_epoch
epoch_acc /= update_per_epoch
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
print('Epoch {:>2} Loss: {:>4.4f} Training Accuracy: {:.6f}'.format(i + 1, epoch_loss, epoch_acc))
valid_image, valid_label = next(val_batch_gen)
valid_this_loss, valid_this_acc = sess.run([cost, accuracy], feed_dict = {
input_img: valid_image,
y_true: valid_label,
lr_in: lr,
keep_prob: 1.
})
valid_loss.append(valid_this_loss)
valid_acc.append(valid_this_acc)
print('Epoch {:>2} Loss: {:>4.4f} Validation Accuracy: {:.6f}'.format(i + 1,valid_this_loss, valid_this_acc))
# early stop
if valid_this_loss > np.min(valid_loss):
es_n += 1
lr_n += 1
else:
es_n = 0
lr_n = 0
saver.save(sess, os.path.join(os.getcwd(), 'bestsession.ckpt'))
# early stop
if es_n >= es_patience:
print("-----------early stopping-------------")
break
# adaptive learning rate
if lr_n >= lr_patience:
lr *= lr_decay_rate
lr_n = 0
print("-----------adjust learning rate------------")
# Save Model
save_path = saver.save(sess, save_model_path)
print('-----model save ------')
----------- 18/09/07------------
I can always reproduce the same result.
And this is my code, I wrote it in Jupyter. But sorry I can't upload the training data:
https://drive.google.com/open?id=1uUE32KrNmWnhLbV8z-fyHSMu6zGCCG_e