I was trying to make a prediction from a loaded tensorflow model. Though I'm not sure if it's correct how I previously saved it, specifically I have doubts about code inside serving_input_fn() function (MAX_SEQ_LENGTH=128):
def serving_input_fn():
feature_spec = { "input_ids" : tf.FixedLenFeature([None,MAX_SEQ_LENGTH], tf.int64),
"input_mask" : tf.FixedLenFeature([None,MAX_SEQ_LENGTH], tf.int64),
"segment_ids" : tf.FixedLenFeature([None,MAX_SEQ_LENGTH], tf.int64),
"label_ids" : tf.FixedLenFeature([None], tf.int64) }
serialized_tf_example = tf.placeholder(dtype=tf.string,shape=[None],name='input_example_tensor')
receiver_tensors = {'example': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
estimator.export_saved_model('gs://bucket/trained_model', serving_input_receiver_fn=serving_input_fn)
When I try to predict from loaded model:
from tensorflow.contrib import predictor
predict_fn = predictor.from_saved_model(LOAD_PATH)
input_features_test = convert_examples_to_features( test_examples,label_list, MAX_SEQ_LENGTH, tokenizer)
predictions = predict_fn({'example':input_features_test[0]})
it returns this error:
ValueError: Cannot feed value of shape () for Tensor
'input_example_tensor:0', which has shape '(?,)'
How should I change serving_input_fn() method?
If you want to reproduce it: github_repo (you should download variables from here and put it in trained_model/1608370941/ folder)
This is the tutorial I followed to fine tuned BERT model on google cloud TPU.
I saved the model using following serving_input_fn() function (found in this tutorial):
def serving_input_fn():
label_ids = tf.placeholder(tf.int32, [None], name='label_ids')
input_ids = tf.placeholder(tf.int32, [None, MAX_SEQ_LENGTH], name='input_ids')
input_mask = tf.placeholder(tf.int32, [None, MAX_SEQ_LENGTH], name='input_mask')
segment_ids = tf.placeholder(tf.int32, [None, MAX_SEQ_LENGTH], name='segment_ids')
input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
'label_ids': label_ids,
'input_ids': input_ids,
'input_mask': input_mask,
'segment_ids': segment_ids,
})()
return input_fn
Then I loaded it using the code mentioned below:
from tensorflow.contrib import predictor
predict_fn = predictor.from_saved_model(LOAD_PATH_GCP)
I converted input string to BERT model input features with following method:
def convert_single_string_to_input_dict(example_string_prep, vocab_file_path, max_seq_length):
#Inizialize BERT tokenizer
tokenizer = tokenization.FullTokenizer(vocab_file_path, do_lower_case=True)
token_a = tokenizer.tokenize(example_string_prep)
tokens = []
segments_ids = []
segment_ids = []
tokens.append("[CLS]")
segment_ids.append(0)
for token in token_a:
tokens.append(token)
segment_ids.append(0)
tokens.append('[SEP]')
segment_ids.append(0)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
input_mask = [1] * len(input_ids)
while len(input_ids) < max_seq_length:
input_ids.append(0)
input_mask.append(0)
segment_ids.append(0)
label_id = [0]
padding = [0] * max_seq_length
print(len(input_ids),len(input_mask),len(segment_ids),len(label_id))
return {"input_ids":[input_ids,padding], "input_mask":[input_mask,padding], "segment_ids":[segment_ids,padding], "label_ids":label_id}
Finally I made a prediction using loaded model:
MAX_SEQ_LENGTH = 128
VOCAB_FILE_PATH = 'path/to/vocab'
example_features = convert_single_string_to_input_dict(example_string, VOCAB_FILE_PATH, MAX_SEQ_LENGTH)
prediction = predict_fn(example_features)['probabilities'][0]
prediction_dict = {'POS': round(prediction[1],4), 'NEG': round(prediction[0],4)}
pprint(f"prediction: {prediction_dict}")
Related
I'm finetuning sentence-bert to do some task like sentence cosine-similarity calculation in Tensorflow. I set up a encoder, let's say, encoder1 using the code below:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]
model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
embeddings = model.encode(sentences)
This is using the sentence-transformers API. And I also set up another encoder, call encoder2 using the code below:
from transformers import AutoTokenizer, TFAutoModel
import tensorflow as tf
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L12-v2', from_pt=True)
model_tf = TFAutoModel.from_pretrained('sentence-transformers/all-MiniLM-L12-v2', from_pt=True)
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=128, return_tensors='tf')
outputs = model_tf(**encoded_input)
def mean_pooling(model_output, input_mask):
# seq_output shape=[batch_size, max_seq_len, hidden_size]
# input_mask shape=[batch_size, max_seq_len]
# expand input_mask
seq_output = model_output[0]
input_mask_expanded = tf.cast(tf.broadcast_to(tf.expand_dims(input_mask, -1), seq_output.shape), tf.float32)
# pooled = tf.reduce_sum(seq_output * input_mask_expanded, 1) / tf.clip_by_value(tf.reduce_sum(input_mask_expanded, 1), clip_value_min=-1, clip_value_max=)
pooled = tf.reduce_sum(seq_output * input_mask_expanded, 1) / tf.reduce_sum(input_mask_expanded, 1)
# shape = [batch_size, hidden_size]
return pooled
sentence_embeddings = mean_pooling(outputs, encoded_input['attention_mask'])
pooled_output, _ = tf.linalg.normalize(sentence_embeddings, 2, axis=1)
This loads a pre-trained model from Huggingface, which I have tested and I'm sure that it will produce the same results(pooled_output and embeddings) as in encoder1.
However, the weird thing is that, when I load this encoder2 into my tf.Model, and try to run a classifier to see whether two sentences are close, with the same input, same model trainable weights, same model, it gives different values. Does the network randomly initialize everything after I load the model?
Here's my encoder code:
class ApplicationCLS(tf.keras.layers.Layer):
def __init__(self, bert_encoder_path, batch_size):
super().__init__()
self.bert_encoder = TFAutoModel.from_pretrained(bert_encoder_path, from_pt=True)
self.classifier = CLSlayer(256, 1)
self.loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
self.metric_fn = tf.keras.metrics.BinaryAccuracy(name="accuracy")
self.auc_fn = tf.keras.metrics.AUC()
self.batch_size = batch_size
def mean_pooling(self, model_output, input_mask):
seq_output = model_output[0]
shape = [self.batch_size, seq_output.shape[1], seq_output.shape[2]]
input_mask_expanded = tf.cast(tf.broadcast_to(tf.expand_dims(input_mask, -1), shape), tf.float32)
pooled = tf.reduce_sum(seq_output * input_mask_expanded, 1) / tf.reduce_sum(input_mask_expanded, 1)
return pooled
def call(self, inputs, labels, training=True):
sent1_inputs = inputs["sent1_inputs"]
sent2_inputs = inputs["sent2_inputs"]
# inputs: {"input_ids": input_ids, "input_mask":input_mask, "type_ids": type_id}
sent1_outputs = self.bert_encoder(**sent1_inputs)
tf.print("output:", sent1_outputs[0][0])
sent1_embeddings = self.mean_pooling(sent1_outputs, sent1_inputs['attention_mask'])
sent1_pooled_output, _ = tf.linalg.normalize(sent1_embeddings, 2, axis=1)
sent2_outputs = self.bert_encoder(**sent2_inputs)
sent2_embeddings = self.mean_pooling(sent2_outputs, sent2_inputs['attention_mask'])
sent2_pooled_output, _ = tf.linalg.normalize(sent2_embeddings, 2, axis=1)
# concat
interaction = tf.concat([sent1_pooled_output, sent2_pooled_output], 1)
# classification
logits = self.classifier(interaction)
loss = self.loss_fn(labels, logits)
self.add_loss(loss)
acc = self.metric_fn(labels, logits)
auc = self.auc_fn(labels, logits)
self.add_metric(loss, name="loss")
self.add_metric(acc, name="acc")
self.add_metric(auc, name="auc")
return tf.nn.softmax(logits, name="prediction")
where the sent1_inputs is the same, but the printed outputs are different, what happened?
i was trying to use recall metrics for the TFDebertaForSequenceClassification model for binary classification with label 0 and 1, but i got this error:
ValueError: Shapes (32, 2) and (32, 1) are incompatible
anyone know how to solve it ?
this is how i processed the data:
with tf.device('/cpu:0'):
train_data = tf.data.Dataset.from_tensor_slices((train_df["body"].values, train_df["label"].values))
valid_data = tf.data.Dataset.from_tensor_slices((valid_df.body.values, valid_df.label.values))
def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):
X = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"attention_mask": attention_masks,
}
Y = label
return X, Y
def encode_examples(df, limit=-1):
# prepare list, so that we can build up final TensorFlow dataset from slices.
input_ids_list = []
token_type_ids_list = []
attention_mask_list = []
labels = []
for data in df.to_numpy():
bert_input = tokenizer(data[2],add_special_tokens=True,
max_length=MAX_SEQ_LEN,
padding='max_length',
return_token_type_ids=True,
truncation=True)
input_ids_list.append(bert_input['input_ids'])
token_type_ids_list.append(bert_input['token_type_ids'])
attention_mask_list.append(bert_input['attention_mask'])
labels.append([data[3]])
return tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list, labels)).map(map_example_to_dict)
encoded_train_input = encode_examples(train_df).shuffle(1000).batch(32, drop_remainder=True)
encoded_valid_input = encode_examples(valid_df).shuffle(1000).batch(32, drop_remainder=True)
and this is how i setup the mode:
model = TFDebertaForSequenceClassification.from_pretrained('kamalkraj/deberta-base')
lr = 2e-6 #1e-6 #2e-5 #3e-5
epochs = 1
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=[tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Recall()])
history = model.fit(encoded_train_input, validation_data=encoded_valid_input, epochs=epochs, verbose=1)
this is the screenshot error:
shape not incompatible error
Try passing the number of classes to the model.
configure = DebertaConfig(model_name, gradient_checkpointing=True, num_labels=3)
model=TFDebertaForSequenceClassification.from_pretrained('kamalkraj/deberta-base', config=configure)
I am trying to load a custom TFRecord file into my keras model. I attempted to follow this tutorial: https://medium.com/#moritzkrger/speeding-up-keras-with-tfrecord-datasets-5464f9836c36, but adapting for my use.
My goal is to have the functions work similar to ImageDataGenerator from Keras. I cannot use that function because I specific metadata from the images that the generator does not grab. I'm not including that metadata here because I just need the basic network to function first.
I also want to be able to apply this to a transfer learning application.
I keep getting this error: TypeError: Could not build a TypeSpec for None with type NoneType
I am using Tensorflow 2.2
def _parse_function(serialized):
features = \
{
'image': tf.io.FixedLenFeature([], tf.string),
'label': tf.io.FixedLenFeature([], tf.int64),
'shapex': tf.io.FixedLenFeature([], tf.int64),
'shapey': tf.io.FixedLenFeature([], tf.int64),
}
parsed_example = tf.io.parse_single_example(serialized=serialized,
features=features)
shapex = tf.cast(parsed_example['shapex'], tf.int32)
shapey = tf.cast(parsed_example['shapey'], tf.int32)
image_shape = tf.stack([shapex, shapey, 3])
image_raw = parsed_example['image']
# Decode the raw bytes so it becomes a tensor with type.
image = tf.io.decode_raw(image_raw, tf.uint8)
image = tf.reshape(image, image_shape)
# Get labels
label = tf.cast(parsed_example['label'], tf.float32)
return image, label
def imgs_inputs(type, perform_shuffle=False):
records_dir = '/path/to/tfrecord/'
record_paths = [os.path.join(records_dir,record_name) for record_name in os.listdir(records_dir)]
full_dataset = tf.data.TFRecordDataset(filenames=record_paths)
full_dataset = full_dataset.map(_parse_function, num_parallel_calls=16)
dataset_length = (len(list(full_dataset))) #Gets length of datase
iterator = tf.compat.v1.data.make_one_shot_iterator(databatch)
image, label = iterator.get_next()
#labels saved as values ex: [1,2,3], and are now converted to one hot encoded
label = to_categorical(label)
return image, label
image, label = imgs_inputs(type ='Train',perform_shuffle=True)
#Combine it with keras
# base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(200,200,3), dropout=.3)
model_input = Input(shape=[200,200,3])
#Build your network
model_output = Flatten(input_shape=(200, 200, 3))(model_input)
model_output = Dense(19, activation='relu')(model_output)
#Create your model
train_model = Model(inputs=model_input, outputs=model_output)
#Compile your model
optimizer = Adam(learning_rate=.001)
train_model.compile(optimizer=optimizer,loss='mean_squared_error',metrics=['accuracy'],target_tensors=[label])
#Train the model
train_model.fit(epochs=10,steps_per_epoch=2)
image returns array of shape (100,200,200,3) which is a batch of 100 images
label returns array of shape(100,19) which is a batch of 100 labels (there are 19 labels)
The issue related to shapex and shapey but I don't know exactly why.
I set shapex = 200 and shapey=200. Then I rewrote the model to include the transfer learning.
base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(200,200,3), dropout=.3)
x = base_model.output
types = Dense(19,activation='softmax')(x)
model = Model(inputs=base_model.input,outputs=types)
model.compile(
optimizer='adam',
loss = 'sparse_categorical_crossentropy',
metrics=['accuracy']
history = model.fit(get_batches(), steps_per_epoch=1000, epochs=10)
I found everything I needed on this Google Colab:
[https://colab.research.google.com/github/GoogleCloudPlatform/training-data-analyst/blob/master/courses/fast-and-lean-data-science/04_Keras_Flowers_transfer_learning_solution.ipynb#scrollTo=XLJNVGwHUDy1][1]
[1]: https://colab.research.google.com/github/GoogleCloudPlatform/training-data-analyst/blob/master/courses/fast-and-lean-data-science/04_Keras_Flowers_transfer_learning_solution.ipynb#scrollTo=XLJNVGwHUDy1
I am trying to save a fine tuned bert model. I have ran the code correctly - it works fine, and in the ipython console I am able to call getPrediction and have it result the result.
I have my weight files saved (highest being model.ckpt-333.data-00000-of-00001
I have no idea how I would go about saving the model to be reuseable.
I am using bert-tensorflow.
import json
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime
from sklearn.model_selection import train_test_split
import os
print("tensorflow version : ", tf.__version__)
print("tensorflow_hub version : ", hub.__version__)
#Importing BERT modules
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
#set output directory of the model
OUTPUT_DIR = 'model'
##markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False ##param {type:"boolean"}
if DO_DELETE:
try:
tf.gfile.DeleteRecursively(OUTPUT_DIR)
except:
pass
tf.io.gfile.makedirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))
### Load the data
data = pd.read_csv("data/bbc-text.csv")
data.columns = ['category', 'text']
print('*****Data Loaded: {} *****'.format(data.head()))
#check to see if any null values are present.
print('*****Empty Data: {} *****'.format(data[data.isnull().any(axis=1)]))
#encode category variable into numeric
data.category = pd.Categorical(data.category)
data['code'] = data.category.cat.codes
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, test_size=0.2, random_state=200)
## 2 -- Data Visualisation
print(data.code.unique())
import matplotlib.pyplot as plt
train['code'].value_counts().plot(kind = 'bar')
DATA_COLUMN = 'text'
LABEL_COLUMN = 'code'
label_list = [0, 1, 2, 3, 4]
plt.show()
## 2 -- Data Preprocessing
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"
def create_tokenizer_from_hub_module():
"""Get the vocab file and casing info from the Hub module."""
with tf.Graph().as_default():
bert_module = hub.Module(BERT_MODEL_HUB)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.compat.v1.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
tokenization_info["do_lower_case"]])
return bert.tokenization.FullTokenizer(
vocab_file=vocab_file, do_lower_case=do_lower_case)
tokenizer = create_tokenizer_from_hub_module()
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and validation features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
#Example on first observation in the training set
print("Example of train[0] as a training set")
print("Sentence : ", train_InputExamples.iloc[0].text_a)
print("-"*30)
print("Tokens : ", tokenizer.tokenize(train_InputExamples.iloc[0].text_a))
print("-"*30)
print("Input IDs : ", train_features[0].input_ids)
print("-"*30)
print("Input Masks : ", train_features[0].input_mask)
print("-"*30)
print("Segment IDs : ", train_features[0].segment_ids)
## 3. Creating a Multiclass Classifier
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
num_labels):
bert_module = hub.Module(
BERT_MODEL_HUB,
trainable=True)
bert_inputs = dict(
input_ids=input_ids,
input_mask=input_mask,
segment_ids=segment_ids)
bert_outputs = bert_module(
inputs=bert_inputs,
signature="tokens",
as_dict=True)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
output_layer = bert_outputs["pooled_output"]
hidden_size = output_layer.shape[-1].value
# Create our own layer to tune for politeness data.
output_weights = tf.compat.v1.get_variable(
"output_weights", [num_labels, hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.compat.v1.get_variable(
"output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.compat.v1.variable_scope("loss"):
# Dropout helps prevent overfitting
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
log_probs = tf.nn.log_softmax(logits, axis=-1)
# Convert labels into one-hot encoding
one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
# If we're predicting, we want predicted labels and the probabiltiies.
if is_predicting:
return (predicted_labels, log_probs)
# If we're train/eval, compute loss between predicted and actual label
per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
loss = tf.reduce_mean(per_example_loss)
return (loss, predicted_labels, log_probs)
#A function that adapts our model to work for training, evaluation, and prediction.
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
num_warmup_steps):
"""Returns `model_fn` closure for TPUEstimator."""
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
"""The `model_fn` for TPUEstimator."""
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
label_ids = features["label_ids"]
is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
# TRAIN and EVAL
if not is_predicting:
(loss, predicted_labels, log_probs) = create_model(
is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
train_op = bert.optimization.create_optimizer(
loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)
# Calculate evaluation metrics.
def metric_fn(label_ids, predicted_labels):
accuracy = tf.compat.v1.metrics.accuracy(label_ids, predicted_labels)
true_pos = tf.compat.v1.metrics.true_positives(
label_ids,
predicted_labels)
true_neg = tf.compat.v1.metrics.true_negatives(
label_ids,
predicted_labels)
false_pos = tf.compat.v1.metrics.false_positives(
label_ids,
predicted_labels)
false_neg = tf.compat.v1.metrics.false_negatives(
label_ids,
predicted_labels)
return {
"eval_accuracy": accuracy,
"true_positives": true_pos,
"true_negatives": true_neg,
"false_positives": false_pos,
"false_negatives": false_neg
}
eval_metrics = metric_fn(label_ids, predicted_labels)
if mode == tf.estimator.ModeKeys.TRAIN:
return tf.estimator.EstimatorSpec(mode=mode,
loss=loss,
train_op=train_op)
else:
return tf.estimator.EstimatorSpec(mode=mode,
loss=loss,
eval_metric_ops=eval_metrics)
else:
(predicted_labels, log_probs) = create_model(
is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
predictions = {
'probabilities': log_probs,
'labels': predicted_labels
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
# Return the actual model function in the closure
return model_fn
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 16
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where the learning rate is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 300
SAVE_SUMMARY_STEPS = 100
# Compute train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
model_dir=OUTPUT_DIR,
save_summary_steps=SAVE_SUMMARY_STEPS,
save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)
# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
model_dir=OUTPUT_DIR,
save_summary_steps=SAVE_SUMMARY_STEPS,
save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)
#Initializing the model and the estimator
model_fn = model_fn_builder(
num_labels=len(label_list),
learning_rate=LEARNING_RATE,
num_train_steps=num_train_steps,
num_warmup_steps=num_warmup_steps)
estimator = tf.estimator.Estimator(
model_fn=model_fn,
config=run_config,
params={"batch_size": BATCH_SIZE})
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
features=train_features,
seq_length=MAX_SEQ_LENGTH,
is_training=True,
drop_remainder=False)
# Create an input function for validating. drop_remainder = True for using TPUs.
test_input_fn = run_classifier.input_fn_builder(
features=test_features,
seq_length=MAX_SEQ_LENGTH,
is_training=False,
drop_remainder=False)
# #Training the model
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)
#Evaluating the model with Validation set
accuracy = estimator.evaluate(input_fn=test_input_fn, steps=None)
# A method to get predictions
def getPrediction(in_sentences):
# A list to map the actual labels to the predictions
labels = ["business", "entertainment", "politics", "sports", "tech"]
# Transforming the test data into BERT accepted form
input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences]
# Creating input features for Test data
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
# Predicting the classes
predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH,
is_training=False, drop_remainder=False)
predictions = estimator.predict(predict_input_fn)
return [(sentence, prediction['probabilities'], prediction['labels'], labels[prediction['labels']]) for
sentence, prediction in zip(in_sentences, predictions)]
pred_sentences = list(test['text'])
predictions = getPrediction(pred_sentences)
enc_labels = []
act_labels = []
for i in range(len(predictions)):
enc_labels.append(predictions[i][2])
act_labels.append(predictions[i][3])
pd.DataFrame(enc_labels, columns = ['category']).to_excel('data/submission_bert.xlsx', index = False)
## Random tester
#Classifying random sentences
tests = getPrediction(['Mr.Modi is the Indian Prime Minister',
'Gaming machines are powered by efficient micro processores and GPUs',
'That HBO TV series is really good',
'A trillion dollar economy '
])
As the question clearly says to save the model, here is how it works:
import torch
torch.save(model, 'path/to/model')
saved_model = torch.load('path/to/model')
I think you can just rename your model.ckpt-333.data-00000-of-00001 to bert_model.ckpt and then use it in the same way you would use a non-finetuned model. For example, run
python run_classifier.py \
--task_name=MRPC \
--do_predict=true \
--data_dir=$GLUE_DIR/MRPC \
--vocab_file=$BERT_BASE_DIR/vocab.txt \
--bert_config_file=$BERT_BASE_DIR/bert_config.json \
--init_checkpoint=$TRAINED_CLASSIFIER
with --init_checkpoint pointing to your model's dir, or run bert-as-service
bert-serving-start -model_dir $TRAINED_CLASSIFIER
with the right -model_dir.
You can use these method:
model = MyModel(num_classes).to(device)
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1e-2)
output_model = './models/nameOfYourModel.pth'
# save
def save(model, optimizer):
# save
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict()
}, output_model)
save(model, optimizer)
# load
checkpoint = torch.load(output_model, map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
Source: https://github.com/huggingface/transformers/issues/7849#issuecomment-718726121
I have tried below codes for reusing the saved BERT model.
def serving_input_receiver_fn():
feature_spec = {
"input_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"input_mask" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"segment_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"label_ids" : tf.FixedLenFeature([], tf.int64)
}
serialized_tf_example = tf.placeholder(dtype=tf.string,
shape=[None],
name='input_example_tensor')
print(serialized_tf_example, "serialized_tf_example")
print(serialized_tf_example.shape, "Shape")
receiver_tensors = {'example': serialized_tf_example}
print(receiver_tensors, "receiver_tensors")
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
export_path = './BERTmodel/Data/'
But I am receiving the error below: ' Cannot feed value of shape () for Tensor 'input_example_tensor:0', which has shape '(?,)'
I tried below codes for prediction.
Can someone advise me on this?
pred_sentences = ["The site is great", "I think it's not good"]
def getPrediction(in_sentences):
labels = ["Negative", "Positive", "Neutral"]
input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences]
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
return predict_input_fn
from tensorflow.contrib import predictor
with tf.Session() as sess:
predict_fn = predictor.from_saved_model('model_path')
predictions = predict_fn({"example": getPrediction(pred_sentences)})
print(predictions)