Use Keras Neural Translation Model (h5 format model) in spacy v3

Use Keras Neural Translation Model (h5 format model) in spacy v3 - python

first 'im not a developer by trade, my developer is not available for health reasons but i have some experience in python/spacy development. I need some guidance in this problem:
I have a h5 keras model which was written to translate some dead language from its latin alphabet written form into arabic "letters" using neural machine translation.
here's the code for creating and training the keras model (dataset.txt is some tab delimited sentences, one in latin alphabet and the other one in arabic):
import string
import re
from unicodedata import normalize
from numpy import array
from pickle import load
from pickle import dump
from numpy.random import rand
from numpy.random import shuffle
from numpy import argmax
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Embedding
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
# load doc into memory
def load_doc(filename):
# open the file as read only
file = open(filename, mode='rt', encoding='utf-8')
# read all text
text = file.read()
# close the file
file.close()
return text
# split a loaded document into sentences
def to_pairs(doc):
lines = doc.strip().split('\n')
pairs = [line.split('\t') for line in lines]
return pairs
# clean a list of lines
def clean_pairs(lines):
cleaned = list()
for pair in lines:
clean_pair = list()
for line in pair:
clean_pair.append(' '.join(line))
cleaned.append(clean_pair)
return array(cleaned)
# save a list of clean sentences to file
def save_clean_data(sentences, filename):
dump(sentences, open(filename, 'wb'))
print('Saved: %s' % filename)
# load dataset
filename = 'dataset.txt'
doc = load_doc(filename)
# split into latin-bok pairs
pairs = to_pairs(doc)
# clean sentences
clean_pairs = clean_pairs(pairs)
# save clean pairs to file
save_clean_data(clean_pairs, 'latin-bok.pkl')
# spot check
for i in range(100):
print('[%s] => [%s]' % (clean_pairs[i,0], clean_pairs[i,1]))
# load a clean dataset
def load_clean_sentences(filename):
return load(open(filename, 'rb'))
# save a list of clean sentences to file
def save_clean_data(sentences, filename):
dump(sentences, open(filename, 'wb'))
print('Saved: %s' % filename)
# load dataset
raw_dataset = load_clean_sentences('latin-bok.pkl')
# reduce dataset size
n_sentences = 10000
dataset = raw_dataset[:, :]
# random shuffle
shuffle(dataset)
# split into train/test
train, test = dataset[:70000], dataset[70000:]
# save
save_clean_data(dataset, 'latin-bok-both.pkl')
save_clean_data(train, 'latin-bok-train.pkl')
save_clean_data(test, 'latin-bok-test.pkl')
# load a clean dataset
def load_clean_sentences(filename):
return load(open(filename, 'rb'))
# fit a tokenizer
def create_tokenizer(lines):
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
return tokenizer
# max sentence length
def max_length(lines):
return max(len(line.split()) for line in lines)
# encode and pad sequences
def encode_sequences(tokenizer, length, lines):
# integer encode sequences
X = tokenizer.texts_to_sequences(lines)
# pad sequences with 0 values
X = pad_sequences(X, maxlen=length, padding='post')
return X
# one hot encode target sequence
def encode_output(sequences, vocab_size):
ylist = list()
for sequence in sequences:
encoded = to_categorical(sequence, num_classes=vocab_size)
ylist.append(encoded)
y = array(ylist)
y = y.reshape(sequences.shape[0], sequences.shape[1], vocab_size)
return y
# define NMT model
def define_model(src_vocab, tar_vocab, src_timesteps, tar_timesteps, n_units):
model = Sequential()
model.add(Embedding(src_vocab, n_units, input_length=src_timesteps, mask_zero=True))
model.add(LSTM(n_units))
model.add(RepeatVector(tar_timesteps))
model.add(LSTM(n_units, return_sequences=True))
model.add(TimeDistributed(Dense(tar_vocab, activation='softmax')))
return model
# load datasets
dataset = load_clean_sentences('latin-bok-both.pkl')
train = load_clean_sentences('latin-bok-train.pkl')
test = load_clean_sentences('latin-bok-test.pkl')
# prepare english tokenizer
latin_tokenizer = create_tokenizer(dataset[:, 0])
latin_vocab_size = len(latin_tokenizer.word_index) + 1
latin_length = max_length(dataset[:, 0])
print('latin Vocabulary Size: %d' % latin_vocab_size)
print('latin Max Length: %d' % (latin_length))
# prepare german tokenizer
bok_tokenizer = create_tokenizer(dataset[:, 1])
bok_vocab_size = len(bok_tokenizer.word_index) + 1
bok_length = max_length(dataset[:, 1])
print('bok Vocabulary Size: %d' % bok_vocab_size)
print('bok Max Length: %d' % (bok_length))
# prepare training data
trainX = encode_sequences(bok_tokenizer, bok_length, train[:, 1])
trainY = encode_sequences(latin_tokenizer, latin_length, train[:, 0])
trainY = encode_output(trainY, latin_vocab_size)
# prepare validation data
testX = encode_sequences(bok_tokenizer, bok_length, test[:, 1])
testY = encode_sequences(latin_tokenizer, latin_length, test[:, 0])
testY = encode_output(testY, latin_vocab_size)
# define model
model = define_model(bok_vocab_size, latin_vocab_size, bok_length, latin_length, 256)
model.compile(optimizer='adam', loss='categorical_crossentropy')
# summarize defined model
print(model.summary())
# fit model
filename = 'translator.h5'
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
model.fit(trainX, trainY, epochs=30, batch_size=64, validation_data=(testX, testY), callbacks=[checkpoint], verbose=2)
# load a clean dataset
def load_clean_sentences(filename):
return load(open(filename, 'rb',encoding='utf-8'))
# fit a tokenizer
def create_tokenizer(lines):
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
return tokenizer
# max sentence length
def max_length(lines):
return max(len(line.split()) for line in lines)
# encode and pad sequences
def encode_sequences(tokenizer, length, lines):
# integer encode sequences
X = tokenizer.texts_to_sequences(lines)
# pad sequences with 0 values
X = pad_sequences(X, maxlen=length, padding='post')
return X
# map an integer to a word
def word_for_id(integer, tokenizer):
for word, index in tokenizer.word_index.items():
if index == integer:
return word
return None
# generate target given source sequence
def predict_sequence(model, tokenizer, source):
prediction = model.predict(source, verbose=0)[0]
integers = [argmax(vector) for vector in prediction]
target = list()
for i in integers:
word = word_for_id(i, tokenizer)
if word is None:
break
target.append(word)
return ' '.join(target)
# evaluate the skill of the model
def evaluate_model(model, tokenizer, sources, raw_dataset):
actual, predicted = list(), list()
for i, source in enumerate(sources):
# translate encoded source text
source = source.reshape((1, source.shape[0]))
translation = predict_sequence(model, latin_tokenizer, source)
raw_target, raw_src = raw_dataset[i]
if i < 10:
print('src=[%s], target=[%s], predicted=[%s]' % (raw_src, raw_target, translation))
actual.append([raw_target.split()])
predicted.append(translation.split())
# calculate BLEU score
# load datasets
dataset = load_clean_sentences('latin-bok-both.pkl')
train = load_clean_sentences('latin-bok-train.pkl')
test = load_clean_sentences('latin-bok-test.pkl')
# prepare english tokenizer
latin_tokenizer = create_tokenizer(dataset[:, 0])
latin_vocab_size = len(latin_tokenizer.word_index) + 1
latin_length = max_length(dataset[:, 0])
# prepare german tokenizer
bok_tokenizer = create_tokenizer(dataset[:, 1])
bok_vocab_size = len(bok_tokenizer.word_index) + 1
bok_length = max_length(dataset[:, 1])
# prepare data
trainX = encode_sequences(bok_tokenizer, bok_length, train[:, 1])
testX = encode_sequences(bok_tokenizer, bok_length, test[:, 1])
# load model
model = load_model('translator2.h5')
# test on some training sequences
print('train')
evaluate_model(model, latin_tokenizer, trainX, train)
# test on some test sequences
print('test')
evaluate_model(model, latin_tokenizer, testX, test)
The model works fine and is accurate. Now i wanted to implement my model to do this task: What i want to do is to get all the tokens of a doc and "translate" or rather "predict" them and store the translation in the doc object. As i can't do that i tasked a freelance developer to do this but i'm getting desperated as nothing is done correctly. The developer wrote me this code but nothing works:
code.py
from typing import List
from thinc.api import TensorFlowWrapper
from keras.models import load_model
from thinc.api import Model
from spacy.tokens.doc import Doc
import spacy
from spacy.lookups import Lookups
import os
import json
from spacy.pipeline import TrainablePipe
from spacy.language import Language
from itertools import islice
from typing import Optional
from collections.abc import Callable
from collections.abc import Iterable, Iterator
from spacy.training import Example
#spacy.registry.architectures("LatinBok.v1")
def create_tensorflow_model() -> Model[List[Doc], List[str]]:
model = load_model('translator.h5')
wrapped_model = TensorFlowWrapper(model)
return wrapped_model
#spacy.registry.misc("mon_lookups_loader")
def load_lookups(data_path):
lookups = Lookups()
# "lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"
with open(os.path.join('lemma/mon_lookup_lemma.json'), 'r',encoding="utf-8") as lemma_lookup:
lookups.add_table('lemma_lookup', json.load(lemma_lookup))
return lookups
class TrainableComponent(TrainablePipe):
def __init__(self, vocab, model, name="translator"):
self.model = model
self.vocab = vocab
self.name = name
def predict(self, docs):
return self.model.predict(docs)
def set_annotations(self, docs, scores):
c = 0
get_instances = self.model.attrs["get_instances"]
for doc in docs:
for (e1, e2) in get_instances(doc):
offset = (e1.start, e2.start)
if offset not in doc._.trans:
doc._.trans[offset] = {}
for j,token in enumerate(doc):
doc._.trans[offset][token] = scores[c, j]
c += 1
def initialize(
self,
get_examples: Callable[[], Iterable[Example]],
*,
nlp: Language = None,
labels: Optional[List[str]] = None,
):
if labels is not None:
for label in labels:
self.add_label(label)
else:
for example in get_examples():
relations = example.reference._.trans
for indices, label_dict in relations.items():
for label in label_dict.keys():
self.add_label(label)
subbatch = list(islice(get_examples(), 10))
doc_sample = [eg.reference for eg in subbatch]
label_sample = self._examples_to_truth(subbatch)
self.model.initialize(X=doc_sample, Y=label_sample)
#Language.factory("translator")
def make_component(nlp, name, model):
return TrainableComponent(nlp.vocab, model, name=name)
and he also rewrote my config.cfg file into:
[paths]
train = null
dev = null
vectors = null
init_tok2vec = null
[system]
gpu_allocator = null
seed = 0
[nlp]
lang = "bok"
pipeline = ["tok2vec","tagger","morphologizer","lemmatizer","translator"]
batch_size = 1000
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
tokenizer = {"#tokenizers":"spacy.Tokenizer.v1"}
[components]
[components.lemmatizer]
factory = "lemmatizer"
mode = "lookup"
model = null
overwrite = false
scorer = {"#scorers":"spacy.lemmatizer_scorer.v1"}
[components.morphologizer]
factory = "morphologizer"
extend = false
overwrite = true
scorer = {"#scorers":"spacy.morphologizer_scorer.v1"}
[components.morphologizer.model]
#architectures = "spacy.Tagger.v1"
nO = null
[components.morphologizer.model.tok2vec]
#architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
upstream = "*"
[components.tagger]
factory = "tagger"
neg_prefix = "!"
overwrite = false
scorer = {"#scorers":"spacy.tagger_scorer.v1"}
[components.tagger.model]
#architectures = "spacy.Tagger.v1"
nO = null
[components.tagger.model.tok2vec]
#architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
upstream = "*"
[components.tok2vec]
factory = "tok2vec"
[components.tok2vec.model]
#architectures = "spacy.Tok2Vec.v2"
[components.tok2vec.model.embed]
#architectures = "spacy.MultiHashEmbed.v2"
width = ${components.tok2vec.model.encode.width}
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
rows = [5000,2500,2500,2500]
include_static_vectors = true
[components.tok2vec.model.encode]
#architectures = "spacy.MaxoutWindowEncoder.v2"
width = 256
depth = 8
window_size = 1
maxout_pieces = 3
[components.translator]
factory = "translator"
[components.translator.model]
#architectures = "LatinBok.v1"
[corpora]
[corpora.dev]
#readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[corpora.train]
#readers = "spacy.Corpus.v1"
path = ${paths.train}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null
[training]
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
dropout = 0.1
accumulate_gradient = 1
patience = 1600
max_epochs = 0
max_steps = 20000
eval_frequency = 200
frozen_components = []
annotating_components = []
before_to_disk = null
[training.batcher]
#batchers = "spacy.batch_by_words.v1"
discard_oversize = false
tolerance = 0.2
get_length = null
[training.batcher.size]
#schedules = "compounding.v1"
start = 100
stop = 1000
compound = 1.001
t = 0.0
[training.logger]
#loggers = "spacy.ConsoleLogger.v1"
progress_bar = false
[training.optimizer]
#optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = false
eps = 0.00000001
learn_rate = 0.001
[training.score_weights]
tag_acc = 0.33
pos_acc = 0.17
morph_acc = 0.17
morph_per_feat = null
lemma_acc = 0.33
[pretraining]
[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null
[initialize.components]
[initialize.tokenizer]
[initialize.components.lemmatizer]
[initialize.components.lemmatizer.lookups]
#misc = "bok_lookups_loader"
data_path = 'lemma'
[initialize.components.translator]
and told me to retrain the model using the Boktrain.spacy and Bokdev.spacy (which were connlu files which i used to train the original model) and a json containing the lemmas.
He told me to write this in my command line to compile everything:
python -m spacy train config.cfg --output Bok --paths.train Boktrain.spacy --paths.dev Bokdev.spacy --code code.py
Which i did but i got this error:
[2022-05-13 15:01:37,487] [INFO] Set up nlp object from config
[2022-05-13 15:01:37,753] [INFO] Pipeline: ['tok2vec', 'tagger', 'morphologizer', 'lemmatizer', 'translator']
[2022-05-13 15:01:45,619] [INFO] Created vocabulary
[2022-05-13 15:01:45,630] [INFO] Finished initializing nlp object
Traceback (most recent call last):
File "C:\Users\User\anaconda3\envs\BokModel\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\User\anaconda3\envs\BokModel\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\spacy\__main__.py", line 4, in <module>
setup_cli()
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\spacy\cli\_util.py", line 71, in setup_cli
command(prog_name=COMMAND)
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\click\core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\click\core.py", line 1055, in main
rv = self.invoke(ctx)
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\click\core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\click\core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\click\core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\typer\main.py", line 500, in wrapper
return callback(**use_params) # type: ignore
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\spacy\cli\train.py", line 45, in train_cli
train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\spacy\cli\train.py", line 72, in train
nlp = init_nlp(config, use_gpu=use_gpu)
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\spacy\training\initialize.py", line 84, in init_nlp
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\spacy\language.py", line 1309, in initialize
proc.initialize(get_examples, nlp=self, **p_settings)
File "C:\Users\User\Downloads\BokModel\LatinToArabic\test.py", line 65, in initialize
relations = example.reference._.trans
File "C:\Users\User\anaconda3\envs\BokModel\lib\site-packages\spacy\tokens\underscore.py", line 47, in __getattr__
raise AttributeError(Errors.E046.format(name=name))
AttributeError: [E046] Can't retrieve unregistered extension attribute 'trans'. Did you forget to call the `set_extension` method?
I'm desperatly trying just to get a spacy doc object with the translations in it, so that i can use that in my spacy model pipeline or by using a custom spacy extension. I have basic spacy/python understanding, but i'm not understanding anything here. I tried to compile the code with each section and it seems that only the lemma part is correct, everything else seems to be incorrect. If anyone has a solution to this, as simple as it may seem for you, i'm interested.

Your error just indicates that that particular field isn't registered. You can register it using this - put it at the top of your code.py, after the imports:
Doc.set_extension("trans")
I don't know if you have other problems or not, but that will fix your current error.
If you don't understand something, I would recommend you search the spaCy docs for related topics - there's a lot of documented examples.

Related

Why does shap Explainer give KeyError: 'class0'?

I have a pytorch text multiclass classifier model based on a XLMR based architecture. Due to IP reasons I can't share the architecture code. I have tried putting as much detail as I can. Please point out if more information is needed. But it outputs 28 classes from 'class0' to 'class27' with probability scores that add to 1.
I am trying to use shap package to explain the results. I have wrapped my model into huggingface's custom pipeline object and I get the following output for 1 input text:
pipe = CustomPipeline(model = model, tokenizer = base_tokenizer)
output = pipe(list_of_inputs) # list_of_inputs = ['this is test input']
Output:
[[{"label": "class0","score": 0.01500235591083765},{"label": "class1","score": 0.001698049483820796},{"label": "class2","score": 0.0019644589629024267},{"label": "class3","score": 0.0004418794414959848},{"label": "class4","score": 5.9095666074426845e-05},{"label": "class5","score": 0.0007908751722425222},{"label": "class6","score": 0.002379569923505187},{"label": "class7","score": 0.0035733324475586414},{"label": "class8","score": 0.0014360857894644141},{"label": "class9","score": 0.0007365105557255447},{"label": "class10","score": 0.0014471099711954594},{"label": "class11","score": 0.0011013210751116276},{"label": "class12","score": 0.0010048456024378538},{"label": "class13","score": 0.000885132874827832},{"label": "class14","score": 0.0022015925496816635},{"label": "class15","score": 0.0013197452062740922},{"label": "class16","score": 0.0037292027845978737},{"label": "class17","score": 0.004212632775306702},{"label": "class18","score": 0.9481304287910461},{"label": "class19","score": 0.001469381619244814},{"label": "class20","score": 0.0009713817853480577},{"label": "class21","score": 0.0018773127812892199},{"label": "class22","score": 0.0009251375449821353},{"label": "class23","score": 0.0007248060428537428},{"label": "class24","score": 0.00031718137324787676},{"label": "class25","score": 0.0011144360760226846},{"label": "class26","score": 0.0002294857840752229},{"label": "class27","score": 0.00025681318948045373}]]
The output is in same format as the notebook specified by shap package.
Now, when I try to use shap Explainer:
pipe = UDTMPipeline(model = model, tokenizer = base_tokenizer)
explainer = shap.Explainer(pipe)
shap_values = explainer(list_of_inputs)
shap.plots.text(shap_values)
Error:
File "C:\pipeline.py", line 104, in udtm_xai
shap_values = explainer(query_data)
File "C:\Users\Miniconda3\envs\dtlr_udtm\lib\site-packages\shap\explainers\_partition.py", line 136, in __call__
return super().__call__(
File "C:\Users\Miniconda3\envs\dtlr_udtm\lib\site-packages\shap\explainers\_explainer.py", line 266, in __call__
row_result = self.explain_row(
File "C:\Users\Miniconda3\envs\dtlr_udtm\lib\site-packages\shap\explainers\_partition.py", line 161, in explain_row
self._curr_base_value = fm(m00.reshape(1, -1), zero_index=0)[0] # the zero index param tells the masked model what the baseline is
File "C:\Users\Miniconda3\envs\dtlr_udtm\lib\site-packages\shap\utils\_masked_model.py", line 67, in __call__
return self._full_masking_call(masks, batch_size=batch_size)
File "C:\Users\Miniconda3\envs\dtlr_udtm\lib\site-packages\shap\utils\_masked_model.py", line 144, in _full_masking_call
outputs = self.model(*joined_masked_inputs)
File "C:\Users\Miniconda3\envs\dtlr_udtm\lib\site-packages\shap\models\_transformers_pipeline.py", line 35, in __call__
output[i, self.label2id[obj["label"]]] = sp.special.logit(obj["score"]) if self.rescale_to_logits else obj["score"]
KeyError: 'class0'
The code is unable to find 'class0'. In the postprocess function of pipeline class, I read a file containing label mappings, obtain the softmax scores from _forward function of pipeline class and create a dictionary in the final format to send as output:
class CustomPipeline(Pipeline): # Ignore no indentation below, formatting issue in stackoverflow
def _sanitize_parameters(self, **kwargs):
self.mapping_json = json.loads(open("mapping_file.json", "r", encoding = "utf-8").read().strip())
return {}, {}, {}
def preprocess(self, inputs):
inputs_df = pd.DataFrame([inputs], columns = ["Query"])
inference_dataloader = getInferenceDataloader(inputs_df, self.tokenizer, batch_size = 16)
return inference_dataloader
def softmax_with_temp(self, input, t):
ex = torch.exp(input/t)
sum = torch.sum(ex,0)
return ex / sum
def _forward(self, model_inputs):
if torch.cuda.is_available():
device = torch.device('cuda')
else:
device = torch.device('cpu')
final_pred_labels = []
final_scores = []
batch_count = 0
self.model.eval()
for batch in model_inputs:
batch_count += 1
b_input_ids = batch[0].to(device)
b_input_mask = batch[1].to(device)
b_input_task = torch.full((b_input_ids.shape[0],), -1, dtype=torch.int32).to(device)
with torch.no_grad():
result = self.model((b_input_ids, b_input_mask, b_input_task))
logits = result
logits = logits.detach().cpu().numpy()
pred_softmax_t = self.softmax_with_temp(torch.from_numpy(logits[0]), 2).numpy()
return pred_softmax_t
def postprocess(self, model_outputs):
output_list = [{"label":"class0", "score":float(model_outputs[0])}]
index = 1
for label in self.mapping_json.keys(): #self.mapping_json contains label names that has been read from a file
output_list.append({"label":label, "score":float(model_outputs[index])}) #model_outputs is a list of 28 floating scores
index += 1
return output_list
Am I missing some to define any label based variable which is why I am getting 'class0' key error?

Getting a RuntimeError while running chatbot using Pytorch and NLTK library (python engineer tutorial code)

I have written this line "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" still getting the following errror. I have provided my all code that i written till now please with this.
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_addmm)
Train.py File
import json
from nltk_utils import tokenize, stem, bag_of_words
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from model import NeuralNet
with open('intents.json', 'r') as f:
intents = json.load(f)
all_words = []
tags = []
xy = []
for intent in intents['intents']:
tag = intent['tag']
tags.append(tag)
for pattern in intent['patterns']:
w = tokenize(pattern)
all_words.extend(w)
xy.append((w, tag))
ignore_words = ['?', '!', '.', ',']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
x_train = []
y_train = []
for (pattern_sentence, tag) in xy:
bag = bag_of_words(pattern_sentence, all_words)
x_train.append(bag)
label = tags.index(tag)
y_train.append(label) # CrossEntropyLoss
x_train = np.array(x_train)
y_train = np.array(y_train)
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(x_train)
self.x_data = x_train
self.y_data = y_train
# dataset[idx]
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n_samples
batch_size = 8
hidden_size = 8
output_size = len(tags)
input_size = len(x_train[0])
learning_rate = 0.001
num_epochs = 1000
print(input_size, len(all_words))
print(output_size, tags)
dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for(words, labels) in train_loader:
words = words.to(device)
labels = labels.type(torch.LongTensor) # <----- Here (Casting)
labels = labels.to(device)
# forward
outputs = model(words)
loss = criterion(outputs, labels)
# backward and optimizer step
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 100 == 0:
print(f'epoch {epoch + 1}/{num_epochs}, loss={loss.item():.4f}')
print(f'final loss: {loss.item():.4f}')
data = {
"model_state" : model.state_dict(),
"input_size" : input_size,
"output_size" : output_size,
"hidden_size" : hidden_size,
"all_words" : all_words,
"tags" : tags,
}
FILE = "data.pth"
torch.save(data, FILE)
print(f'training complete , file saved to {FILE}')
chat.py file
import random
import json
import torch
from model import NeuralNet
from nltk_utils import bag_of_words, tokenize
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cuda')
with open('intents.json', 'r') as f:
intents = json.load(f)
FILE = 'data.pth'
data = torch.load(FILE)
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data["all_words"]
tags = data["tags"]
model_state = data["model_state"]
model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()
bot_name = 'Sam'
print("Let's chat: type 'quit' to exit")
while True:
sentence = input('You: ')
if sentence == "Quit":
break
sentence = tokenize(sentence)
X = bag_of_words(sentence, all_words)
X = X.reshape(1, X.shape[0])
X = torch.from_numpy(X)
output = model(X)
_, predicted = torch.max(output, dim=1)
tag = tags[predicted.item()]
probs = torch.softmax(output, dim=1)
prob = probs[0][predicted.item()]
if prob.item() > 0.75:
for intent in intents["intents"]:
if tag == intent["tag"]:
print(f"{bot_name}: {random.choice(intent['responses'])}")
else:
print(f"{bot_name}: I do not understand...:(")
After running the chat.py. while chatbot is trying to send their response then it giving the runtime error.
I should get the response from chatbot. but getting the following error
Error Image

How to train GPT2 with Tensorflow

I'm trying to train gpt2 model with custom dataset, but it fails with the error below.
ValueError: Unexpected result of `train_function` (Empty logs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.
I thought model and dataset are correctly defined and processed by referring this article.
But the error shows up when model.fit is executed.
Can someone tell me how to resolve the error, or proper way to train the model?
from transformers import TFGPT2LMHeadModel, GPT2Tokenizer
import tensorflow as tf
# Define the model
model = TFGPT2LMHeadModel.from_pretrained('gpt2', from_pt=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=[loss, *[None] * model.config.n_layer], metrics=[metric], run_eagerly=True)
model.summary()
# Obtain the tokeinizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.add_special_tokens({
"eos_token": "</s>",
"bos_token": "<s>",
"unk_token": "<unk>",
"pad_token": "<pad>",
"mask_token": "<mask>"
})
# Get single string
paths = ['data.txt'] # each file only contains some sentences.
single_string = ''
for filename in paths:
with open(filename, "r", encoding='utf-8') as f:
x = f.read()
single_string += x + tokenizer.eos_token
string_tokenized = tokenizer.encode(single_string)
print(string_tokenized)
# creating the TensorFlow dataset
examples = []
block_size = 100
BATCH_SIZE = 12
BUFFER_SIZE = 1000
for i in range(0, len(string_tokenized) - block_size + 1, block_size):
examples.append(string_tokenized[i:i + block_size])
inputs, labels = [], []
for ex in examples:
inputs.append(ex[:-1])
labels.append(ex[1:])
dataset = tf.data.Dataset.from_tensor_slices((inputs, labels))
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
print(dataset)
# train the model
num_epoch = 10
history = model.fit(dataset, epochs=num_epoch) # <- shows the error

Transformer Image captioning model produces just padding rather than a caption

I am trying to produce a model that will produce a caption for an image using resnet as the encoder, transformer as the decoder and COCO as the database.
After training my model for 10 epochs, my model failed to produce anything other than the word <pad> which implies that the only result after going through the model only produced tokens of 0 which corresponds to <pad>.
After using the debugger it seems that the error occurs at the argmax, where the output just becomes zero rather than anything else, but I don't know how to fix it, is it an issue with my model, or the way it is trained?
I based my model off of this github if it helps.
The script to download the COCO model is here:
Download.sh
mkdir data
wget http://msvocds.blob.core.windows.net/annotations-1-0-3/captions_train-val2014.zip -P ./data/
wget http://images.cocodataset.org/zips/train2014.zip -P ./data/
wget http://images.cocodataset.org/zips/val2014.zip -P ./data/
unzip ./data/captions_train-val2014.zip -d ./data/
rm ./data/captions_train-val2014.zip
unzip ./data/train2014.zip -d ./data/
rm ./data/train2014.zip
unzip ./data/val2014.zip -d ./data/
rm ./data/val2014.zip
Any help is much appreciated.
Here is my code:
model.py
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math, copy, time
import torchvision.models as models
from torch.nn import TransformerDecoderLayer, TransformerDecoder
from torch.nn.utils.rnn import pack_padded_sequence
from torch.autograd import Variable
class EncoderCNN(nn.Module):
def __init__(self, embed_size):
super(EncoderCNN, self).__init__()
resnet = models.resnet152(pretrained=True)
self.resnet = nn.Sequential(*list(resnet.children())[:-2])
self.conv1 = nn.Conv2d(2048, embed_size, 1)
self.embed_size = embed_size
self.fine_tune()
def forward(self, images):
features = self.resnet(images)
batch_size, _,_,_ = features.shape
features = self.conv1(features)
features = features.view(batch_size, self.embed_size, -1)
features = features.permute(2, 0, 1)
return features
def fine_tune(self, fine_tune=True):
for p in self.resnet.parameters():
p.requires_grad = False
# If fine-tuning, only fine-tune convolutional blocks 2 through 4
for c in list(self.resnet.children())[5:]:
for p in c.parameters():
p.requires_grad = fine_tune
class PositionEncoder(nn.Module):
def __init__(self, d_model, dropout, max_len=5000):
super(PositionEncoder, self).__init__()
self.dropout = nn.Dropout(p=dropout)
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
x = x + self.pe[:x.size(0), :]
return self.dropout(x)
class Embedder(nn.Module):
def __init__(self, vocab_size, d_model):
super().__init__()
self.embed = nn.Embedding(vocab_size, d_model)
def forward(self, x):
return self.embed(x)
class Transformer(nn.Module):
def __init__(self, vocab_size, d_model, h, num_hidden, N, device, dropout_dec=0.1, dropout_pos=0.1):
super(Transformer, self).__init__()
decoder_layers = TransformerDecoderLayer(d_model, h, num_hidden, dropout_dec)
self.source_mask = None
self.device = device
self.d_model = d_model
self.pos_decoder = PositionalEncoder(d_model, dropout_pos)
self.decoder = TransformerDecoder(decoder_layers, N)
self.embed = Embedder(vocab_size, d_model)
self.linear = nn.Linear(d_model, vocab_size)
self.init_weights()
def forward(self, source, mem):
source = source.permute(1,0)
if self.source_mask is None or self.source_mask.size(0) != len(source):
self.source_mask = nn.Transformer.generate_square_subsequent_mask(self=self, sz=len(source)).to(self.device)
source = self.embed(source)
source = source*math.sqrt(self.d_model)
source = self.pos_decoder(source)
output = self.decoder(source, mem, self.source_mask)
output = self.linear(output)
return output
def init_weights(self):
initrange = 0.1
self.linear.bias.data.zero_()
self.linear.weight.data.uniform_(-initrange, initrange)
def pred(self, memory, pred_len):
batch_size = memory.size(1)
src = torch.ones((pred_len, batch_size), dtype=int) * 2
if self.source_mask is None or self.source_mask.size(0) != len(src):
self.source_mask = nn.Transformer.generate_square_subsequent_mask(self=self, sz=len(src)).to(self.device)
output = torch.ones((pred_len, batch_size), dtype=int)
src, output = src.cuda(), output.cuda()
for i in range(pred_len):
src_emb = self.embed(src) # src_len * batch size * embed size
src_emb = src_emb*math.sqrt(self.d_model)
src_emb = self.pos_decoder(src_emb)
out = self.decoder(src_emb, memory, self.source_mask)
out = out[i]
out = self.linear(out) # batch_size * vocab_size
out = out.argmax(dim=1)
if i < pred_len-1:
src[i+1] = out
output[i] = out
return output
Data_Loader.py
import torch
import torchvision.transforms as transforms
import torch.utils.data as data
import os
import pickle
import numpy as np
import nltk
from PIL import Image
from build_vocab import Vocabulary
from pycocotools.coco import COCO
class CocoDataset(data.Dataset):
"""COCO Custom Dataset compatible with torch.utils.data.DataLoader."""
def __init__(self, root, json, vocab, transform=None):
"""Set the path for images, captions and vocabulary wrapper.
Args:
root: image directory.
json: coco annotation file path.
vocab: vocabulary wrapper.
transform: image transformer.
"""
self.root = root
self.coco = COCO(json)
self.ids = list(self.coco.anns.keys())
self.vocab = vocab
self.transform = transform
def __getitem__(self, index):
"""Returns one data pair (image and caption)."""
coco = self.coco
vocab = self.vocab
ann_id = self.ids[index]
caption = coco.anns[ann_id]['caption']
img_id = coco.anns[ann_id]['image_id']
path = coco.loadImgs(img_id)[0]['file_name']
image = Image.open(os.path.join(self.root, path)).convert('RGB')
if self.transform is not None:
image = self.transform(image)
# Convert caption (string) to word ids.
tokens = nltk.tokenize.word_tokenize(str(caption).lower())
caption = []
caption.append(vocab('<start>'))
caption.extend([vocab(token) for token in tokens])
caption.append(vocab('<end>'))
target = torch.Tensor(caption)
return image, target
def __len__(self):
return len(self.ids)
def collate_fn(data):
"""Creates mini-batch tensors from the list of tuples (image, caption).
We should build custom collate_fn rather than using default collate_fn,
because merging caption (including padding) is not supported in default.
Args:
data: list of tuple (image, caption).
- image: torch tensor of shape (3, 256, 256).
- caption: torch tensor of shape (?); variable length.
Returns:
images: torch tensor of shape (batch_size, 3, 256, 256).
targets: torch tensor of shape (batch_size, padded_length).
lengths: list; valid length for each padded caption.
"""
# Sort a data list by caption length (descending order).
data.sort(key=lambda x: len(x[1]), reverse=True)
images, captions = zip(*data)
# Merge images (from tuple of 3D tensor to 4D tensor).
images = torch.stack(images, 0)
# Merge captions (from tuple of 1D tensor to 2D tensor).
lengths = [len(cap) for cap in captions]
targets = torch.zeros(len(captions), max(lengths)).long()
for i, cap in enumerate(captions):
end = lengths[i]
targets[i, :end] = cap[:end]
return images, targets, lengths
def get_loader(root, json, vocab, transform, batch_size, shuffle, num_workers):
"""Returns torch.utils.data.DataLoader for custom coco dataset."""
# COCO caption dataset
coco = CocoDataset(root=root,
json=json,
vocab=vocab,
transform=transform)
# Data loader for COCO dataset
# This will return (images, captions, lengths) for each iteration.
# images: a tensor of shape (batch_size, 3, 224, 224).
# captions: a tensor of shape (batch_size, padded_length).
# lengths: a list indicating valid length for each caption. length is (batch_size).
data_loader = torch.utils.data.DataLoader(dataset=coco,
batch_size=batch_size,
shuffle=shuffle,
num_workers=num_workers,
collate_fn=collate_fn)
return data_loader
Build_vocab.py
import nltk
import pickle
import argparse
from collections import Counter
from pycocotools.coco import COCO
class Vocabulary(object):
def __init__(self):
self.word2idx = {}
self.idx2word = {}
self.idx = 0
def add_word(self, word):
if not word in self.word2idx:
self.word2idx[word] = self.idx
self.idx2word[self.idx] = word
self.idx += 1
def __call__(self, word):
if not word in self.word2idx:
return self.word2idx['<unk>']
return self.word2idx[word]
def __len__(self):
return len(self.word2idx)
def build_vocab(json, threshold):
coco = COCO(json)
counter = Counter()
ids = coco.anns.keys()
for i, id in enumerate(ids):
caption = str(coco.anns[id]['caption'])
tokens = nltk.tokenize.word_tokenize(caption.lower())
counter.update(tokens)
if (i+1) % 1000 == 0:
print("[{}/{}] Tokenized the captions.".format(i+1, len(ids)))
# If the word frequency is less than 'threshold', then the word is discarded.
words = [word for word, cnt in counter.items() if cnt >= threshold]
# Create a vocab wrapper and add some special tokens.
vocab = Vocabulary()
vocab.add_word('<pad>')
vocab.add_word('<start>')
vocab.add_word('<end>')
vocab.add_word('<unk>')
# Add the words to the vocabulary.
for i, word in enumerate(words):
vocab.add_word(word)
return vocab
def main(args):
vocab = build_vocab(json=args.caption_path, threshold=args.threshold)
vocab_path = args.vocab_path
with open(vocab_path, 'wb') as f:
pickle.dump(vocab, f)
print("Total vocabulary size: {}".format(len(vocab)))
print("Saved the vocabulary wrapper to '{}'".format(vocab_path))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--caption_path', type=str,
default='./data/annotations/captions_train2014.json',
help='path for train annotation file')
parser.add_argument('--vocab_path', type=str, default='./data/vocab.pkl',
help='path for saving vocabulary wrapper')
parser.add_argument('--threshold', type=int, default=4,
help='minimum word count threshold')
args = parser.parse_args()
main(args)
train.py
import argparse
import torch
import torch.nn as nn
import numpy as np
import os
import pickle
import math
from tqdm import tqdm
from data_loader import get_loader
from build_vocab import Vocabulary
from model import EncoderCNN, Decoder
from torch.nn.utils.rnn import pack_padded_sequence
from torchvision import transforms
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def main(args):
batch_size = 64
embed_size = 512
num_heads = 8
num_layers = 6
num_workers = 2
num_epoch = 5
lr = 1e-3
load = False
# Create model directory
if not os.path.exists('models/'):
os.makedirs('models/')
# Image preprocessing, normalization for the pretrained resnet
transform = transforms.Compose([
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))])
# Load vocabulary wrapper
with open('data/vocab.pkl', 'rb') as f:
vocab = pickle.load(f)
# Build data loader
data_loader = get_loader('data/resized2014', 'data/annotations/captions_train2014.json', vocab,
transform, batch_size,
shuffle=True, num_workers=num_workers)
encoder = EncoderCNN(embed_size).to(device)
encoder.fine_tune(False)
decoder = Decoder(len(vocab), embed_size, num_heads, embed_size, num_layers).to(device)
if(load):
encoder.load_state_dict(torch.load(os.path.join('models/', 'encoder-{}-{}.ckpt'.format(5, 5000))))
decoder.load_state_dict(torch.load(os.path.join('models/', 'decoder-{}-{}.ckpt'.format(5, 5000))))
print("Load Successful")
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
encoder_optim = torch.optim.Adam(encoder.parameters(), lr=lr)
decoder_optim = torch.optim.Adam(decoder.parameters(), lr=lr)
# Train the models
for epoch in range(num_epoch):
encoder.train()
decoder.train()
for i, (images, captions, lengths) in tqdm(enumerate(data_loader), total=len(data_loader), leave=False):
# Set mini-batch dataset
images = images.to(device)
captions = captions.to(device)
# Forward, backward and optimize
features = encoder(images)
cap_input = captions[:, :-1]
cap_target = captions[:, 1:]
outputs = decoder(cap_input, features)
outputs = outputs.permute(1,0,2)
outputs_shape = outputs.reshape(-1, len(vocab))
loss = criterion(outputs_shape, cap_target.reshape(-1))
decoder.zero_grad()
encoder.zero_grad()
loss.backward()
encoder_optim.step()
decoder_optim.step()
# Save the model checkpoints
if (i+1) % args.save_step == 0:
torch.save(decoder.state_dict(), os.path.join(
'models/', 'decoder-{}-{}.ckpt'.format(epoch+1, i+1)))
torch.save(encoder.state_dict(), os.path.join(
'models/', 'encoder-{}-{}.ckpt'.format(epoch+1, i+1)))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--log_step', type=int , default=10, help='step size for prining log info')
parser.add_argument('--save_step', type=int , default=1000, help='step size for saving trained models')
args = parser.parse_args()
print(args)
main(args)
sample.py
import torch
import matplotlib.pyplot as plt
import numpy as np
import argparse
import pickle
import os
from torchvision import transforms
from build_vocab import Vocabulary
from data_loader import get_loader
from model import EncoderCNN, Decoder
from PIL import Image
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#
def token_sentence(decoder_out, itos):
tokens = decoder_out
tokens = tokens.transpose(1, 0)
tokens = tokens.cpu().numpy()
results = []
for instance in tokens:
result = ' '.join([itos[x] for x in instance])
results.append(''.join(result.partition('<eos>')[0])) # Cut before '<eos>'
return results
def load_image(image_path, transform=None):
image = Image.open(image_path).convert('RGB')
image = image.resize([224, 224], Image.LANCZOS)
if transform is not None:
image = transform(image).unsqueeze(0)
return image
def main(args):
batch_size = 64
embed_size = 512
num_heads = 8
num_layers = 6
num_workers = 2
# Image preprocessing
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406),
(0.229, 0.224, 0.225))])
# Load vocabulary wrapper
with open(args.vocab_path, 'rb') as f:
vocab = pickle.load(f)
data_loader = get_loader('data/resized2014', 'data/annotations/captions_train2014.json', vocab,
transform, batch_size,
shuffle=True, num_workers=num_workers)
# Build models
encoder = EncoderCNN(embed_size).to(device)
encoder.fine_tune(False)
decoder = Decoder(len(vocab), embed_size, num_heads, embed_size, num_layers).to(device)
# Load trained models
encoder.load_state_dict(torch.load(os.path.join('models/', 'encoder-{}-{}.ckpt'.format(1, 4000))))
decoder.load_state_dict(torch.load(os.path.join('models/', 'decoder-{}-{}.ckpt'.format(1, 4000))))
encoder.eval()
decoder.eval()
itos = vocab.idx2word
pred_len = 100
result_collection = []
# Decode with greedy
# with torch.no_grad():
# for i, (images, captions, lengths) in enumerate(data_loader):
# images = images.to(device)
# features = encoder(images)
# output = decoder.generator(features, pred_len)
# result_caption = token_sentence(output, itos)
# result_collection.extend(result_caption)
# Decode with greedy
with torch.no_grad():
for batch_index, (inputs, captions, caplens) in enumerate(data_loader):
inputs, captions = inputs.cuda(), captions.cuda()
enc_out = encoder(inputs)
captions_input = captions[:, :-1]
captions_target = captions[:, 1:]
output = decoder.pred(enc_out, pred_len)
result_caption = token_sentence(output, itos)
result_collection.extend(result_caption)
print("Prediction-greedy:", result_collection[1])
print("Prediction-greedy:", result_collection[2])
print("Prediction-greedy:", result_collection[3])
print("Prediction-greedy:", result_collection[4])
print("Prediction-greedy:", result_collection[5])
print("Prediction-greedy:", result_collection[6])
print("Prediction-greedy:", result_collection[7])
print("Prediction-greedy:", result_collection[8])
print("Prediction-greedy:", result_collection[9])
print("Prediction-greedy:", result_collection[10])
print("Prediction-greedy:", result_collection[11])
# # Prepare an image
# image = load_image(args.image, transform)
# image_tensor = image.to(device)
# # Generate an caption from the image
# feature = encoder(image_tensor)
# sampled_ids = decoder.generator(feature, pred_len)
# sampled_ids = sampled_ids[0].cpu().numpy() # (1, max_seq_length) -> (max_seq_length)
# # Convert word_ids to words
# sampled_caption = []
# for word_id in sampled_ids:
# word = vocab.idx2word[word_id]
# sampled_caption.append(word)
# if word == '<end>':
# break
# sentence = ' '.join(sampled_caption)
# # Print out the image and the generated caption
# print (sentence)
# image = Image.open(args.image)
# plt.imshow(np.asarray(image))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--image', type=str, required=False, help='input image for generating caption')
parser.add_argument('--vocab_path', type=str, default='data/vocab.pkl', help='path for vocabulary wrapper')
args = parser.parse_args()
main(args)
resize.py
import argparse
import os
from PIL import Image
def resize_image(image, size):
"""Resize an image to the given size."""
return image.resize(size, Image.ANTIALIAS)
def resize_images(image_dir, output_dir, size):
"""Resize the images in 'image_dir' and save into 'output_dir'."""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
images = os.listdir(image_dir)
num_images = len(images)
for i, image in enumerate(images):
with open(os.path.join(image_dir, image), 'r+b') as f:
with Image.open(f) as img:
img = resize_image(img, size)
img.save(os.path.join(output_dir, image), img.format)
if (i+1) % 100 == 0:
print ("[{}/{}] Resized the images and saved into '{}'."
.format(i+1, num_images, output_dir))
def main(args):
image_dir = args.image_dir
output_dir = args.output_dir
image_size = [args.image_size, args.image_size]
resize_images(image_dir, output_dir, image_size)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--image_dir', type=str, default='./data/train2014/',
help='directory for train images')
parser.add_argument('--output_dir', type=str, default='./data/resized2014/',
help='directory for saving resized images')
parser.add_argument('--image_size', type=int, default=256,
help='size for image after processing')
args = parser.parse_args()
main(args)

How to feed data efficiently into a trained tf.estimator model and get a prediction? predict_input_fn issue

Environment: Win10, Python 3.6.8, Tensorflow-gpu 1.13.1
I have a model based on tf.estimator.DNNRegressor which I've trained locally. Now I'm having an issue getting the predict_input_fn function to work with real-time data. I have it working by reading a CSV file with a single entry but would like to pass in a numpy array or some other form of data from memory and have the model make a prediction.
Here's a code snippet:
tf.enable_eager_execution()
model = tf.estimator.DNNRegressor(
hidden_units=[128,128],
feature_columns=feature_columns,
model_dir=model_dir)
def predict_input_fn():
test_array = np.array([-0.057,-0.0569]) # shortened, normally 56 floats
defaults = [tf.float32] * 56
filenames = ['./prediction.csv']
dataset_old = tf.data.experimental.CsvDataset( # this works
filenames=filenames,
record_defaults=defaults)
dataset = tf.data.Dataset.from_tensor_slices(test_array) # this doesn't :(
dataset = dataset.batch(1)
features = tf.data.experimental.get_single_element(dataset)
return dict(zip(feature_names, features)), None
predictor = model.predict(input_fn=predict_input_fn)
print(next(predictor))
I'm getting the following error even though eager execution is enabled:
TypeError: Tensor objects are only iterable when eager execution is enabled. To iterate over this tensor use tf.map_fn.
The data format I'm feeding in for training is 56 x float32 variables as features and 1 x float32 variable as a label. As mentioned it works perfectly when using the tf.data.experimental.CsvDataset() function but for the life of me I can't figure out how to feed in a basic array.
I've tried putting the array in a container array, using the standard make_one_shot_iterator() function on the dataset, using .constant() to try and build a tensor before passing it to tensor_slices. Have gone through the docs and every example I can find but still can't get it working.
Struggling a bit, any help would be greatly appreciated.
Full code below:
import tensorflow as tf
# import tensorflowjs as tfjs
import json
import numpy as np
tf.enable_eager_execution()
model_dir = './dnn3/'
feature_columns = []
feature_names = []
feature_spec = {}
for i in range(56):
feature_columns.append(tf.feature_column.numeric_column(key="jkey"+str(i)))
feature_names.append("jkey"+str(i))
feature_spec["jkey"+str(i)] = tf.FixedLenFeature([1],tf.float32)
def serving_input_receiver_fn():
serialized_tf_example = tf.placeholder(dtype=tf.string,shape=[None],name='input_tensors')
receiver_tensors = {'inputs': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
def train_input_fn():
defaults = [tf.float32] * 57
filenames = ['./training.csv'] # can add to this array
dataset = tf.data.experimental.CsvDataset(
filenames=filenames,
record_defaults=defaults)
dataset = dataset.shuffle(1000000)
dataset = dataset.repeat(1000) # epochs
dataset = dataset.batch(128)
iter = dataset.make_one_shot_iterator()
next = iter.get_next()
features = next[:-1]
label = next[-1]
return dict(zip(feature_names, features)), label
def eval_input_fn():
defaults = [tf.float32] * 57
filenames = ['./evaluation.csv']
dataset = tf.data.experimental.CsvDataset(
filenames=filenames,
record_defaults=defaults)
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(100)
dataset = dataset.batch(100)
iter = dataset.make_one_shot_iterator()
next = iter.get_next()
features = next[:-1]
label = next[-1]
return dict(zip(feature_names, features)), label
def predict_input_fn():
test_array = [[-0.057,-0.0569,-0.0569,-0.0759,-0.0568,-0.0379,-0.0379,-0.0567,-0.0946,0.0378,0.0566,0.0755,0,0,0,0,-0.0189,0,0,0,0,0.0379,0.0378,0.0568,0,0.0378,0,0.0758,0.0379,0.0379,0.057,0.0758,0.0379,0.0381,-0.152,0.2089,-0.0952,0.0568,-0.2485,.4667,-0.0803,-0.0775,-0.0832,-0.054,-0.0989,0.0063,0.0037,-0.0342,0.0007,0.0281,0.0187,-0.0065,0.0423,0.078,-0.0285,-0.0093],[-0.057,-0.0569,-0.0569,-0.0759,-0.0568,-0.0379,-0.0379,-0.0567,-0.0946,0.0378,0.0566,0.0755,0,0,0,0,-0.0189,0,0,0,0,0.0379,0.0378,0.0568,0,0.0378,0,0.0758,0.0379,0.0379,0.057,0.0758,0.0379,0.0381,-0.152,0.2089,-0.0952,0.0568,-0.2485,.4667,-0.0803,-0.0775,-0.0832,-0.054,-0.0989,0.0063,0.0037,-0.0342,0.0007,0.0281,0.0187,-0.0065,0.0423,0.078,-0.0285,-0.0093]]
defaults = [tf.float32] * 56
filenames = ['./prediction.csv']
dataset2 = tf.data.experimental.CsvDataset(
filenames=filenames,
record_defaults=defaults)
test_tensor = tf.constant(test_array)
#test_dict = dict(zip(feature_names, test_array))
dataset = tf.data.Dataset.from_tensor_slices(test_tensor)
dataset = dataset.batch(1)
features = tf.data.experimental.get_single_element(dataset)
return dict(zip(feature_names, features)), None
model = tf.estimator.DNNRegressor(
hidden_units=[128,128],
feature_columns=feature_columns,
model_dir=model_dir)
def train():
for i in range(5):
model.train(input_fn=train_input_fn, steps=10000)
eval_result = model.evaluate(input_fn=eval_input_fn)
print(eval_result)
def train_eval():
train_spec = tf.estimator.TrainSpec(train_input_fn,max_steps=20000)
eval_spec = tf.estimator.EvalSpec(eval_input_fn)
tf.estimator.train_and_evaluate(model,train_spec,eval_spec)
def save():
save_dir = model.export_savedmodel(export_dir_base=model_dir+'saved', serving_input_receiver_fn=serving_input_receiver_fn)
print('Saved to ',save_dir)
def predict():
predictor = model.predict(input_fn=predict_input_fn)
print(next(predictor))
predict()
# tfjs.converters.save_keras_model(model, './tfjs/')
And full error response:
WARNING:tensorflow:From C:\Users\jimba\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
Traceback (most recent call last):
File "train.py", line 93, in <module>
predict()
File "train.py", line 91, in predict
print(next(predictor))
File "C:\Users\jimba\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 609, in predict
input_fn, model_fn_lib.ModeKeys.PREDICT)
File "C:\Users\jimba\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 967, in _get_features_from_input_fn
result = self._call_input_fn(input_fn, mode)
File "C:\Users\jimba\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1079, in _call_input_fn
return input_fn(**kwargs)
File "train.py", line 67, in predict_input_fn
return dict(zip(feature_names, features)), None
File "C:\Users\jimba\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 442, in __iter__
"Tensor objects are only iterable when eager execution is "
TypeError: Tensor objects are only iterable when eager execution is enabled. To iterate over this tensor use tf.map_fn

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Use Keras Neural Translation Model (h5 format model) in spacy v3 - python

Related

Why does shap Explainer give KeyError: 'class0'?

Getting a RuntimeError while running chatbot using Pytorch and NLTK library (python engineer tutorial code)

How to train GPT2 with Tensorflow

Transformer Image captioning model produces just padding rather than a caption

How to feed data efficiently into a trained tf.estimator model and get a prediction? predict_input_fn issue

Categories

Resources