Tensorflow Cannot get value inside Tensorflow graph function - python

I'm trying to implement a neural network model with TensorFlow for text classification. I created a custom layer in order to calculate a weighted average and to learn weights but I keep getting this weird error that tells "Cannot get value inside Tensorflow graph function". Here's what I've coded so far (For a small sample texts just to test).
This is my custom layer:
from tensorflow.keras import layers
import tensorflow as tf
import numpy as np
class WeightedAverageLayer(layers.Layer):
def __init__(self, vocab_size, embedding_dimension, doc_length):
super(WeightedAverageLayer, self).__init__()
self.embedding_dimension = embedding_dimension
self.vocab_size = vocab_size
self.doc_length = doc_length
#Initialize the embedding layer
self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dimension, input_length=doc_length)
#Initialize the words importance
b_init = tf.zeros_initializer()
self.b = self.add_weight(shape=(vocab_size,), initializer='random_normal', trainable=True)
def calculate_average(self, doc):
doc_words = self.embedding_layer(doc)
word_embeddings = self.embedding_layer.get_weights()[0]
s = 0
avg = 0
z = np.zeros((self.embedding_dimension))
for i in range(self.embedding_dimension):
for j in range(len(doc)):
val = doc[j]
vw = word_embeddings[val]
a = self.b[val]
s = s + vw[i] * a
avg = s / len(doc)
z[i] = avg
s = 0
avg = 0
return z
def call(self, docs):
result = []
for doc in docs:
vect = self.calculate_average(doc)
result.append(vect)
result_np = np.array(result)
return tf.stack(result_np)
#return list(map(self.calculate_average, docs))
and this is the main file where i test my layer in a model:
vocab_size = 20
embedding_dim = 10
max_length = 4
# define documents
docs = ['Well done!', 'Good work', 'Great effort', 'nice work', 'Excellent!',
'Weak', 'Poor effort!', 'not good', 'poor work', 'Could have done better.', 'You are amazing']
# define class labels
labels = np.array([[1,0],[1,0],[1,0],[1,0],[1,0],[0,2],[0,2],[0,2],[0,2],[0,2],[1,0]])
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(docs)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(docs)
pad = pad_sequences(sequences, maxlen=max_length, padding="post")
average_layer = WeightedAverageLayer(vocab_size, embedding_dim, max_length )
output_layer = tf.keras.layers.Dense(2, activation='softmax')
input_docs = Input(shape=(max_length))
weighted_average = average_layer(input_docs)
output = output_layer(weighted_average)
model = Model(input_docs, output)
Please note that I tested the layer alone and it worked fine.

Related

Keras - ValueError: Could not interpret loss function identifier

I am trying to build the autoencoder structure detailed in this IEEE article. The autoencoder uses a separable loss function where it is required that I create a custom loss function for the "cluster loss" term of the separable loss function as a function of the average output of the encoder. I create my own layer called RffConnected that calculates the cluster loss and utilizes the add_loss method. Otherwise, this RffConnected layer should act as just a normal deep layer.
Here are my relevant code snippets:
import matplotlib.pyplot as plot
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import math
from matplotlib.figure import Figure
import tensorflow as tf
import keras
from keras import layers
import random
import time
from os import listdir
#loads data from a text file
def loadData(basePath, samplesPerFile, sampleRate):
real = []
imag = []
fileOrder = []
for file in listdir(basePath):
if((file != "READ_ME") and ((file != "READ_ME.txt"))):
fid = open(basePath + "\\" + file, "r")
fileOrder.append(file)
t = 0
sampleEvery = samplesPerFile / sampleRate
temp1 = []
temp2 = []
times = []
for line in fid.readlines():
times.append(t)
samples = line.split("\t")
temp1.append(float(samples[0]))
temp2.append(float(samples[1]))
t = t + sampleEvery
real.append(temp1)
imag.append(temp2)
fid.close()
real = np.array(real)
imag = np.array(imag)
return real, imag, times, fileOrder
#####################################################################################################
#Breaks up and randomizes data
def breakUpData(real, imag, times, numPartitions, basePath):
if(len(real) % numPartitions != 0):
raise ValueError("Error: The length of the dataset must be divisible by the number of partitions.")
newReal = []
newImag = []
newTimes = []
fileOrder = listdir(basePath)
dataFiles = []
interval = int(len(real[0]) / numPartitions)
for i in range(0, interval):
newTimes.append(times[i])
for i in range(0, len(real)):
tempI = []
tempQ = []
for j in range(0, len(real[0])):
tempI.append(real[i, j])
tempQ.append(imag[i, j])
if((j + 1) % interval == 0):
newReal.append(tempI)
newImag.append(tempQ)
#fileName = fileOrder[i][0: fileOrder[i].find("_") + 3]
dataFiles.append(fileOrder[i])
tempI = []
tempQ = []
#randomizes the broken up dataset and the file list
for i in range(0, len(newReal)):
r = random.randint(0, len(newReal) - 1)
tempReal = newReal[i]
tempImag = newImag[i]
newReal[i] = newReal[r]
newImag[i] = newImag[r]
newReal[r] = tempReal
newImag[r] = tempImag
tempFile = dataFiles[i]
dataFiles[i] = dataFiles[r]
dataFiles[r] = tempFile
#return np.array(newReal), np.array(newImag), newTimes, dataFiles
return newReal, newImag, newTimes, dataFiles
#####################################################################################################
#custom loss layer for the RffAe-S that calculates the clustering loss term
class RffConnected(layers.Layer):
def __init__(self, output_dim, batchSize, beta, alpha):
super(RffConnected, self).__init__()
# self.total = tf.Variable(initial_value=tf.zeros((input_dim,)), trainable=False)
#array = np.zeros(output_dim)
self.iters = 0.0
self.beta = beta
self.alpha = alpha
self.batchSize = batchSize
self.output_dim = output_dim
self.sum = tf.zeros(output_dim, tf.float64)
self.moving_average = tf.zeros(output_dim, tf.float64)
self.clusterloss = tf.zeros(output_dim, tf.float64)
self.sum = tf.cast(self.sum, tf.float32)
self.moving_average = tf.cast(self.moving_average, tf.float32)
self.clusterloss = tf.cast(self.clusterloss, tf.float32)
# self.sum = keras.Input(shape=(self.output_dim,))
# self.moving_average = keras.Input(shape=(self.output_dim,))
# self.clusterloss = keras.Input(shape=(self.output_dim,))
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel', \
shape = (int(input_shape[-1]), self.output_dim), \
initializer = 'normal', trainable = True)
#self.kernel = tf.cast(self.kernel, tf.float64)
super(RffConnected, self).build(int(input_shape[-1]))
def call(self, inputs):
#keeps track of training epochs
self.iters = self.iters + 1
#inputs = tf.cast(inputs, tf.float64)
#where this custom layer acts as a normal layer- the loss then uses this
#calc = keras.backend.dot(inputs, self.kernel)
calc = tf.matmul(inputs, self.kernel)
#cumulative sum of deep encoded features
#self.sum = state_ops.assign(self.sum, tf.reshape(tf.math.add(self.sum, calc), tf.shape(self.sum)))
#self.sum = tf.ops.state_ops.assign(self.sum, tf.math.add(self.sum, calc))
#self.sum.assign_add(calc)
self.sum = tf.math.add(self.sum, calc)
#calculate the moving average and loss if we have already trained one batch
if(self.iters >= self.batchSize):
self.moving_average = tf.math.divide(self.sum, self.iters)
self.clusterloss = tf.math.exp(\
tf.math.multiply(-1 * self.beta, tf.math.reduce_sum(tf.math.square(tf.math.subtract(inputs, self.moving_average)))))
#self.add_loss(tf.math.multiply(self.clusterloss, self.alpha))
self.add_loss(self.clusterloss.numpy() * self.alpha)
return calc
#####################################################################################################
def customloss(y_true, y_pred):
loss = tf.square(y_true - y_pred)
print(loss)
return loss
#####################################################################################################
realTraining = np.array(real[0:2200])
realTesting = np.array(real[2200:-1])
imagTraining = np.array(imag[0:2200])
imagTesting = np.array(imag[2200:-1])
numInputs = len(realTraining[0])
i_sig = keras.Input(shape=(numInputs,))
q_sig = keras.Input(shape=(numInputs,))
iRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(i_sig)
rff1 = keras.Model(inputs=i_sig, outputs=iRff)
qRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(q_sig)
rff2 = keras.Model(inputs=q_sig, outputs=qRff)
combined = layers.Concatenate()([iRff, qRff])
combineRff = tf.keras.layers.experimental.RandomFourierFeatures(4 * numInputs, \
kernel_initializer='gaussian', scale=10.0)(combined)
preprocess = keras.Model(inputs=[iRff, qRff], outputs=combineRff)
#print(realTraining[0:5])
preprocessedTraining = preprocess.predict([realTraining, imagTraining])
preprocessedTesting = preprocess.predict([realTesting, imagTesting])
################## Entering Encoder ######################
encoderIn = keras.Input(shape=(4*numInputs,))
#connected1 = layers.Dense(100, activation="sigmoid")(encoderIn)
clusterLossLayer = RffConnected(100, 30, 1.00, 100.00)(encoderIn)
#clusterLossLayer = myRffConnected(256)(connected1)
encoder = keras.Model(inputs=encoderIn, outputs=clusterLossLayer)
################## Entering Decoder ######################
connected2 = layers.Dense(125, activation="sigmoid")(clusterLossLayer)
relu1 = layers.ReLU()(connected2)
dropout = layers.Dropout(0.2)(relu1)
reshape1 = layers.Reshape((25, 5, 1))(dropout)
bn1 = layers.BatchNormalization()(reshape1)
trans1 = layers.Conv2DTranspose(1, (4, 2))(bn1)
ups1 = layers.UpSampling2D(size=(2, 1))(trans1)
relu2 = layers.ReLU()(ups1)
bn2 = layers.BatchNormalization()(relu2)
trans2 = layers.Conv2DTranspose(1, (4, 2))(bn2)
ups2 = layers.UpSampling2D(size=(2, 1))(trans2)
relu3 = layers.ReLU()(ups2)
bn3 = layers.BatchNormalization()(relu3)
trans3 = layers.Conv2DTranspose(1, (5, 2))(bn3)
ups3 = layers.UpSampling2D(size=(2, 1))(trans3)
relu4 = layers.ReLU()(ups3)
bn4 = layers.BatchNormalization()(relu4)
trans4 = layers.Conv2DTranspose(1, (7, 1))(bn4)
reshape2 = layers.Reshape((4*numInputs, 1, 1))(trans4)
autoencoder = keras.Model(inputs=encoderIn, outputs=reshape2)
encoded_input = keras.Input(shape=(None, 100))
decoder_layer = autoencoder.layers[-1]
#autoencoder.summary()
autoencoder.compile(optimizer='adam', loss=[autoencoder.losses[-1], customloss], metrics=['accuracy', 'accuracy'])
autoencoder.fit(preprocessedTraining, preprocessedTraining, epochs=100, batch_size=20, shuffle=True, validation_data=(preprocessedTesting, preprocessedTesting))
It seems like it runs for two training epochs then it gives me an error. I end up getting this error when I run it:
ValueError: Could not interpret loss function identifier: Tensor("rff_connected_137/Const:0", shape=(100,), dtype=float32)
I've already spent a considerable amount of time debugging this thing, although if you spot any more errors I would appreciate a heads-up. Thank you in advance.
According to the documentation of the keras Keras Model Training-Loss, the 'loss' attribute can take the value of float tensor (except for the sparse loss functions returning integer arrays) with a specific shape.
If it is necessary to combine two loss functions, it would be better to perform mathematical calculations within your custom loss function to return an output of float tensor. This reference might be a help Keras CustomLoss definition.

Modifying T5 for sequence labelling

I am trying to modify the T5-model as a sequence labelling task (to do NER).
I create my model class by taking the last hidden states of the T5-model and add a linear layer with 3 out-features (for simple IOB-tags).
Here is my model class:
class Seq2SeqTokenCLS(nn.Module):
def __init__(self):
super(Seq2SeqTokenCLS, self).__init__()
self.num_labels = 3
self.base_model = T5ForConditionalGeneration.from_pretrained('t5-small')
# average of n last hidden layers
self.layers = 3
# change beam search or greedy search here
# Suggested parameters from the T5 paper: num_beams = 4 and length penalty alpha = 0.6
self.base_model.config.num_beams = 1 # <-- change to 1 for greedy decoding
self.base_model.config.length_penalty = 0.6 # <-- comment this out for greedy decoding
self.dropout = nn.Dropout(0.5)
self.dense = nn.Linear(in_features=512 * self.layers, out_features=self.num_labels)
def forward(self, input_ids, attn_mask, labels):
hidden_states = self.base_model(
input_ids,
attention_mask=attn_mask,
output_hidden_states=True
)
hidden_states = torch.cat([hidden_states['decoder_hidden_states'][-(n+1)] for n in range(self.layers)], dim=2)
logits = self.dense(self.dropout(hidden_states))
loss = None
loss_fct = nn.CrossEntropyLoss(weight=class_weights)
# Only keep active parts of the loss
if attn_mask is not None:
active_loss = attn_mask.view(-1) == 1
active_logits = logits.view(-1, self.num_labels)
active_labels = torch.where(
active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
)
loss = loss_fct(active_logits, active_labels)
else:
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return {'logits':logits,
'loss':loss}
However, I am confused about how should do inference in this approach. Should I use the .generate function as when T5 has a standard LM head? If that is the case, then I don't know how to inherit the function into my new model class...
Or can I use a normal evaluation loop?
E.g. something like this?:
predictions = []
all_labels = []
with torch.no_grad():
for batch in tqdm(test_loader):
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
outputs = model(input_ids=input_ids,
attn_mask=attention_mask
)
for sample, lab in zip(outputs['logits'],labels):
preds = torch.argmax(sample, dim=1)
predictions.append(preds)
all_labels.append(lab)
I would still like to experiment with beam search...

Transformer PyTorch Error- ValueError: too many values to unpack (expected 2)

I am having issues getting my model to run. I am not sure which model to use in the translate_sentence function, I have tried model.transformer, model.encoder_de, etc. It is based off of the Transformer class and the forward() function I believe but I am getting a type error. These are the directions:
As in the forward(self, src, tgt) function of the
TransformerModel class, you need to create the appropriate
mask and encode the source sentence (just once).
You also need to create the appropriate mask and encode the
output sentence for sequential predictions. Unlike the source,
for every iteration, you need to re-encode the previous output and
pass both the source sentence and previous output into the
Transformer.
from torch.nn import Transformer
class TransformerModel(nn.Module):
def __init__(self, ntoken_in, ntoken_out, ninp, nhead, npf_dim, nlayers, src_pad_idx, trg_pad_idx, dropout=0.5):
super(TransformerModel, self).__init__()
# --------------- param -----------------
# ntoken_in: the idx of the input word after tokenization
# ntoken_out: the idx of the input word w.r.t. the tokenization
# ninp: the number of expected features in the encoder/decoder inputs
# nhead: the number of multiAttention heads
# npf_dim: the dimension of the feedforward layer
# src_pad_idx: the token for padding in source language
# trg_pad_idx: the token for padding in target language
# ----------------------------------------
self.model_type = 'Transformer'
self.pos_encoder = PositionalEncoding(ninp, dropout)
self.transformer = Transformer(d_model=ninp, nhead=nhead, num_encoder_layers=nlayers, num_decoder_layers=nlayers,
dim_feedforward=npf_dim, dropout=dropout, activation='relu')
self.encoder_en = nn.Embedding(ntoken_in, ninp) # tok_embedding for input
self.encoder_de = nn.Embedding(ntoken_out, ninp) # tok_embedding for output
self.ninp = ninp
self.decoder = nn.Linear(ninp, ntoken_out)
self.src_pad_idx = src_pad_idx
self.tgt_pad_idx = trg_pad_idx
self.init_weights()
def _generate_src_key_mask(self, src):
# for key_padding_mask in transformer
# the positions with the value of True will be ignored while the position
# with the value of False will be unchanged. We mask all padding words.
# The output dim is b*s
src_mask = (src == self.src_pad_idx)
return src_mask.T
def _generate_tgt_mask(self, tgt, sz):
# Beside key_padding_mask in transformer, the output or teacher input
# should be masked sequentially to prevent the model get any information
# from the future words it is going to predict
tgt_key_mask = tgt == self.tgt_pad_idx
# We provide FloatTensor attn_mask. It will be added to the attention weight.
mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
attn_mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to(tgt.device)
return attn_mask, tgt_key_mask.T
def init_weights(self):
initrange = 0.1
self.encoder_en.weight.data.uniform_(-initrange, initrange)
self.encoder_de.weight.data.uniform_(-initrange, initrange)
self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, src, tgt):
# src
src_key_mask = self._generate_src_key_mask(src)
src = self.encoder_en(src) * math.sqrt(self.ninp) # use a learned encoder put stoi index to a feature space s*b --> s*b*e
src = self.pos_encoder(src) # add the pos feature toward feature space
# tgt
tgt_mask, tgt_key_mask = self._generate_tgt_mask(tgt, tgt.size(0))
tgt = self.encoder_de(tgt) * math.sqrt(self.ninp)
tgt = self.pos_encoder(tgt)
output = self.transformer(src, tgt, tgt_mask=tgt_mask,
src_key_padding_mask = src_key_mask,
tgt_key_padding_mask = tgt_key_mask)
output = self.decoder(output)
return output
class PositionalEncoding(nn.Module):
# The positional encoding as described in the paper
# https://arxiv.org/pdf/1706.03762.pdf
def __init__(self, d_model, dropout=0.1, max_len=5000):
super(PositionalEncoding, self).__init__()
self.dropout = nn.Dropout(p=dropout)
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
x = x + self.pe[:x.size(0), :]
return self.dropout(x)
# Here we intialize our model
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
print(INPUT_DIM, OUTPUT_DIM)
HID_DIM = 256
N_LAYERS = 3
N_HEADS = 8
N_PF_DIM = 512
DROPOUT = 0.1
SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
model =TransformerModel(ntoken_in = INPUT_DIM, ntoken_out=OUTPUT_DIM, ninp=HID_DIM,
nhead=N_HEADS, npf_dim=N_PF_DIM, nlayers=N_LAYERS,
src_pad_idx=SRC_PAD_IDX, trg_pad_idx=TRG_PAD_IDX, dropout=DROPOUT).to(device)
def count_parameters(model: nn.Module):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')
def initialize_weights(m):
if hasattr(m, 'weight') and m.weight.dim() > 1:
nn.init.xavier_uniform_(m.weight.data)
model.apply(initialize_weights)
----------
def translate_sentence(sentence, src_field, trg_field, model, device, max_len = 50):
model.eval()
if isinstance(sentence, str):
nlp = spacy.load('de')
tokens = [token.text.lower() for token in nlp(sentence)]
else:
tokens = [token.lower() for token in sentence]
#tokens = [src_field.init_token] + tokens + [src_field.eos_token]
src_indexes = [src_field.vocab.stoi[token] for token in tokens]
src_tensor = torch.LongTensor(src_indexes).unsqueeze(1).to(device)
with torch.no_grad():
#model.?
hidden, cell = model.encoder_en(src_tensor)
# create a list to hold the output sentence, initialized with an <sos> token
trg_indexes = [trg_field.vocab.stoi[trg_field.init_token]]
for i in range(max_len):
trg_tensor = torch.LongTensor(trg_indexes).unsqueeze(1).to(device)
with torch.no_grad():
#model.?
output, hidden, cell = model.encoder_de(trg_tensor, hidden, cell)
pred_token = output.argmax(1).item()
trg_indexes.append(pred_token)
if pred_token == trg_field.vocab.stoi[trg_field.eos_token]:
break
trg_tokens = [trg_field.vocab.itos[i] for i in trg_indexes]
return trg_tokens[1:]
----------
#getting error here
example_idx = 18
src = vars(train_data.examples[example_idx])['src']
trg = vars(train_data.examples[example_idx])['trg']
print(f'src = {src}')
print(f'trg = {trg}')
translation = translate_sentence(src, TRG, SRC, model, device)
print(f'predicted trg = {translation}')

TFBertForTokenClassification scoring only O labels on a NER task

I'm using TFBertForTokenClassification to perform a NER task on the annotated corpus fo NER:
https://www.kaggle.com/abhinavwalia95/entity-annotated-corpus.
The problem is that the O-Labels are the majority of all labels, then the accuracy is quite high as the model correctly predicts most of them.
So, when I try to predict the labels of a simple sentence, the network predict only the O Label for each token of the sentence, however in several tutorials in which it is used Pytorch (I am using Tensorflow), the predictions are good.
Probably there is a problem in my code, but I cannot figure out where is it.
The code is the following:
# Import libraries
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import math
import numpy as np
from transformers import (
TF2_WEIGHTS_NAME,
BertConfig,
BertTokenizer,
TFBertForTokenClassification,
create_optimizer)
# Config
MAX_LEN= 128
TRAIN_BATCH_SIZE = 32
VALID_BTCH_SIZE = 8
EPOCHS = 10
BERT_MODEL = 'bert-base-uncased'
MODEL_PATH = "model.bin"
TRAINING_FILE = "../input/entity-annotated-corpus/ner_dataset.csv"
TOKENIZER = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True)
# Create the padded input, attention masks, token type and labels
def get_train_data(text, tags):
tokenized_text = []
target_tags = []
for index, token in enumerate(text):
encoded_token = TOKENIZER.encode(
token,
add_special_tokens = False
)
encoded_token_len = len(encoded_token)
tokenized_text.extend(encoded_token)
target_tags.extend([tags[index]] * encoded_token_len)
#truncation
tokenized_text = tokenized_text[: MAX_LEN - 2]
target_tags = target_tags[: MAX_LEN - 2]
#[101] = [CLS] , [102] = [SEP]
tokenized_text = [101] + tokenized_text + [102]
target_tags = [0] + target_tags + [0]
attention_mask = [1] * len(tokenized_text)
token_type_ids = [0] * len(tokenized_text)
#padding
padding_len = int(MAX_LEN - len(tokenized_text))
tokenized_text = tokenized_text + ([0] * padding_len)
target_tags = target_tags + ([0] * padding_len)
attention_mask = attention_mask + ([0] * padding_len)
token_type_ids = token_type_ids + ([0] * padding_len)
return (tokenized_text, target_tags, attention_mask, token_type_ids)
# Extract sentences from dataset
class RetrieveSentence(object):
def __init__(self, data):
self.n_sent = 1
self.data = data
self.empty = False
function = lambda s: [(w, p, t) for w, p, t in zip(s["Word"].values.tolist(),
s["POS"].values.tolist(),
s["Tag"].values.tolist())]
self.grouped = self.data.groupby("Sentence #").apply(function)
self.sentences = [s for s in self.grouped]
def retrieve(self):
try:
s = self.grouped["Sentence: {}".format(self.n_sent)]
self.n_sent += 1
return s
except:
return None
# Load dataset and create one hot encoding for labels
df_data = pd.read_csv(TRAINING_FILE,sep=",",encoding="latin1").fillna(method='ffill')
Sentences = RetrieveSentence(df_data)
sentences_list = [" ".join([s[0] for s in sent]) for sent in Sentences.sentences]
labels = [ [s[2] for s in sent] for sent in Sentences.sentences]
tags_2_val = list(set(df_data["Tag"]))
tag_2_idx = {t: i for i, t in enumerate(tags_2_val)}
id_labels = [[tag_2_idx.get(l) for l in lab] for lab in labels]
sentences_list = [sent.split() for sent in sentences_list]
# I removed the sentence n 41770 because it gave index problems
del labels[41770]
del sentences_list[41770]
del id_labels[41770]
encoded_text = []
encoded_labels = []
attention_masks = []
token_type_ids = []
for i in range(len(sentences_list)):
text, labels, att_mask, tok_type = get_train_data(text = sentences_list[i], tags = id_labels[i])
encoded_text.append(text)
encoded_labels.append(labels)
attention_masks.append(att_mask)
token_type_ids.append(tok_type)
# Convert from list to np array
encoded_text = np.array(encoded_text)
encoded_labels = np.array(encoded_labels)
attention_masks = np.array(attention_masks)
token_type_ids = np.array(token_type_ids)
# Train Test split
X_train, X_valid, Y_train, Y_valid = train_test_split(encoded_text, encoded_labels, random_state=20, test_size=0.1)
Mask_train, Mask_valid, Token_ids_train, Token_ids_valid = train_test_split(attention_masks,token_type_ids ,random_state=20, test_size=0.1)
# Aggregate the train and test set, then shuffle and batch the train set
def example_to_features(input_ids,attention_masks,token_type_ids,y):
return {"input_ids": input_ids,
"attention_mask": attention_masks,
"token_type_ids": token_type_ids},y
train_ds = tf.data.Dataset.from_tensor_slices((X_train,Mask_train,Token_ids_train,Y_train)).map(example_to_features).shuffle(1000).batch(32)
test_ds=tf.data.Dataset.from_tensor_slices((X_valid,Mask_valid,Token_ids_valid,Y_valid)).map(example_to_features).batch(1)
# Load TFBertForTokenClassification with default config
config = BertConfig.from_pretrained(BERT_MODEL,num_labels=len(tags_2_val))
model = TFBertForTokenClassification.from_pretrained(BERT_MODEL, from_pt=bool(".bin" in BERT_MODEL), config=config)
# Add softmax layer, compute loss, optimizer and fit
model.layers[-1].activation = tf.keras.activations.softmax
model.summary()
optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
history = model.fit(train_ds, epochs=3, validation_data=test_ds)
# Prediction. Spoiler: the label predicted are O-Label
sentence = "Hi , my name is Bob and I live in England"
inputs = TOKENIZER(sentence, return_tensors="tf")
input_ids = inputs["input_ids"]
inputs["labels"] = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
output = model(inputs)
The code is executed on a Kaggle notebook.
The transformer library version is 3.4.0
Many thanks in advance.

Tensorflow evaluate: Aborted (core dumped)

tl;dr: I input a word to my model, and am supposed to get a list of similar words and their associated measures of similarity back. I get an error: Aborted (core dumped).
My goal is to determine which words are similar to an input word, based on their feature vectors. I have model already trained. I load it and call two functions:
def main(argv=None):
model = NVDM(args)
sess_saver = tf.train.Saver()
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
loaded = load_for_similar(sess, sess_saver) #my function
wm = word_match(sess, loaded[0], loaded[1], "bottle", loaded[2], loaded[3], topN=5)
My problem is that I can't print out the words which are similar and the associated similarity measure. I tried (in main):
sess.run(wm)
wm[0].eval(session=sess)
print(wm)
All of which gave me the error:
F tensorflow/core/kernels/strided_slice_op.cc:316] Check failed: tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)
Aborted (core dumped)
This tells me I'm not running the session properly. What am I doing wrong?
Details on the functions, just in case:
The function 'load_for_similar' restores the weights and bias of the decoder in my model (a variational autoencoder), and normalizes them. It also reverses the order of the keys and values in my vocabulary dictionary for later use:
def load_for_similar(sess, saver_obj):
saver_obj.restore(sess, "./CA_checkpoints/saved_model.ckpt")
vocab_file = '/path/to/vocab.pkl'
t1 = loader_object(vocab_file)
v1 = t1.get_vocab()
v1_rev = {k:v for v, k in v1.iteritems()}
decoder_mat = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[0]
decoder_bias = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[1]
return (find_norm(decoder_mat), find_norm(decoder_bias), v1, v1_rev)
To find similar words, I pass the normalized weight matrix and bias in to an new function, along with the feature vector of my word (vec):
def find_similar(sess, Weights, vec, bias):
dists = tf.add(tf.reduce_sum(tf.mul(Weights, vec)), bias)
best = argsort(sess, dists, reverse=True)
dist_sort = tf.nn.top_k(dists, k=dists.get_shape().as_list()[0], sorted=True).values
return dist_sort, best
Finally, I want to match the words that are closest to my supplied word, "bottle":
def word_match(sess, norm_mat , norm_bias, word_ , vocab, vocab_inverse , topN = 10):
idx = vocab[word_]
similarity_meas , indexes = find_similar(sess, norm_mat , norm_mat[idx], norm_bias)
words = tf.gather(vocab_inverse.keys(), indexes[:topN])
return (words, similarity_meas[:topN])
EDIT: in response to mrry's comment, here is the model (I hope this is what you wanted?). This code depends on utils.py, a separate utilities file. I will include that as well. Please note that this code is heavily based on Yishu Miao's and Sarath Nair's.
class NVDM(object):
""" Neural Variational Document Model -- BOW VAE.
"""
def __init__(self,
vocab_size=15000, #was 2000
n_hidden=500,
n_topic=50,
n_sample=1,
learning_rate=1e-5,
batch_size=100, #was 64
non_linearity=tf.nn.tanh):
self.vocab_size = vocab_size
self.n_hidden = n_hidden
self.n_topic = n_topic
self.n_sample = n_sample
self.non_linearity = non_linearity
self.learning_rate = learning_rate/batch_size #CA
self.batch_size = batch_size
self.x = tf.placeholder(tf.float32, [None, vocab_size], name='input')
self.mask = tf.placeholder(tf.float32, [None], name='mask') # mask paddings
# encoder
with tf.variable_scope('encoder'):
self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden])
self.mean = utils.linear(self.enc_vec, self.n_topic, scope='mean')
self.logsigm = utils.linear(self.enc_vec,
self.n_topic,
bias_start_zero=True,
matrix_start_zero=False,
scope='logsigm')
self.kld = -0.5 * tf.reduce_sum(1 - tf.square(self.mean) + 2 * self.logsigm - tf.exp(2 * self.logsigm), 1)
self.kld = self.mask*self.kld # mask paddings
with tf.variable_scope('decoder'):
if self.n_sample ==1: # single sample
p1 = tf.cast(tf.reduce_sum(self.mask), tf.int32) #needed for random normal generation
eps = tf.random_normal((p1, self.n_topic), 0, 1)
doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
self.recons_loss = -tf.reduce_sum(tf.mul(logits, self.x), 1)
# multiple samples
else:
eps = tf.random_normal((self.n_sample*batch_size, self.n_topic), 0, 1)
eps_list = tf.split(0, self.n_sample, eps)
recons_loss_list = []
for i in xrange(self.n_sample):
if i > 0: tf.get_variable_scope().reuse_variables()
curr_eps = eps_list[i]
doc_vec = tf.mul(tf.exp(self.logsigm), curr_eps) + self.mean
logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
recons_loss_list.append(-tf.reduce_sum(tf.mul(logits, self.x), 1))
self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample
self.objective = self.recons_loss + self.kld
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
fullvars = tf.trainable_variables()
enc_vars = utils.variable_parser(fullvars, 'encoder')
dec_vars = utils.variable_parser(fullvars, 'decoder')
enc_grads = tf.gradients(self.objective, enc_vars)
dec_grads = tf.gradients(self.objective, dec_vars)
self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars))
self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
def minibatch_bow(it1, Instance1, n_samples, batch_size, used_ints = set()):
available = set(np.arange(n_samples)) - used_ints #
if len(available) < batch_size:
indices = np.array(list(available))
else:
indices = np.random.choice(tuple(available), batch_size, replace=False)
used = used_ints
mb = itemgetter(*indices)(it1)
batch_xs = Instance1._bag_of_words(mb, vocab_size=15000)
batch_flattened = np.ravel(batch_xs)
index_positions = np.where(batch_flattened > 0)[0]
return (batch_xs, index_positions, set(indices)) #batch_xs[0] is the bag of words; batch_xs[1] is the 0/1 word used/not;
def train(sess, model, train_file, vocab_file, saver_obj, training_epochs, alternate_epochs, batch_size):
Instance1 = testchunk_Nov23.testLoader(train_file, vocab_file)
data_set = Instance1.get_batch(batch_size) #get all minibatches of size 100
n_samples = Instance1.num_reviews()
train_batches = list(data_set) #this is an itertools.chain object
it1_train = list(itertools.chain(*train_batches)) #length is 732,356. This is all the reviews.atch_size
if len(it1_train) % batch_size != 0:
total_batch = int(len(it1_train)/batch_size) + 1
else:
total_batch = int(len(it1_train)/batch_size)
trainfilesave = "train_ELBO_and_perplexity_Dec1.txt"
#Training
train_time = time.time()
for epoch in range(training_epochs):
for switch in xrange(0, 2):
if switch == 0:
optim = model.optim_dec
print_mode = 'updating decoder'
else:
optim = model.optim_enc
print_mode = 'updating encoder'
with open(trainfilesave, 'w') as f:
for i in xrange(alternate_epochs):
loss_sum = 0.0
kld_sum = 0.0
word_count = 0
used_indices = set()
for idx_batch in range(total_batch): #train_batches:
mb = minibatch_bow(it1_train, Instance1, n_samples, batch_size, used_ints=used_indices)
print('minibatch', idx_batch)
used_indices.update(mb[2])
num_mb = np.ones(mb[0][0].shape[0])
input_feed = {model.x.name: mb[0][0], model.mask: num_mb}
_, (loss, kld) = sess.run((optim,[model.objective, model.kld]) , input_feed)
loss_sum += np.sum(loss)
And the utils.py file:
def linear(inputs,
output_size,
no_bias=False,
bias_start_zero=False,
matrix_start_zero=False,
scope=None):
"""Define a linear connection."""
with tf.variable_scope(scope or 'Linear'):
if matrix_start_zero:
matrix_initializer = tf.constant_initializer(0)
else:
matrix_initializer = None
if bias_start_zero:
bias_initializer = tf.constant_initializer(0)
else:
bias_initializer = None
input_size = inputs.get_shape()[1].value
matrix = tf.get_variable('Matrix', [input_size, output_size],
initializer=matrix_initializer)
bias_term = tf.get_variable('Bias', [output_size],
initializer=bias_initializer)
output = tf.matmul(inputs, matrix)
if not no_bias:
output = output + bias_term
return output
def mlp(inputs,
mlp_hidden=[],
mlp_nonlinearity=tf.nn.tanh,
scope=None):
"""Define an MLP."""
with tf.variable_scope(scope or 'Linear'):
mlp_layer = len(mlp_hidden)
res = inputs
for l in xrange(mlp_layer):
res = mlp_nonlinearity(linear(res, mlp_hidden[l], scope='l'+str(l)))
return res

Categories