Keras BNN AttributeError: 'function' object has no attribute 'values' - python

Trying to use a NN for some normalized histograms labeled in 0 - 1 - 2 - 3 - 4.
Each of the histograms has 31 classes which contains elements from 0.00000-1.00000. hopefully the NN can predict the labels of my test examples by showing them only the classes. It should include statistical informations like mean, cp, cpk and so on.
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_probability as tfp
def get_train_and_test_splits(train_size, batch_size=1):
dataset = pd.read_csv("test.csv")
train_dataset = (
dataset.take(train_size).shuffle(buffer_size=train_size).batch(batch_size)
)
test_dataset = dataset.skip(train_size).batch(batch_size)
return train_dataset, test_dataset
hidden_units = [8,8]
learning_rate = 0.001
def run_experiment(model, loss, train_dataset, test_dataset):
model.compile(
optimizers=keras.optimizers.RMSprop(learning_rate=learning_rate),
loss=loss,
metrics=[keras.metrics.RootMeanSquaredError()],
)
print("Start training model...")
model.fit(train_dataset, epochs=10, validation_data=test_dataset)
print("Model training finished.")
_, rmse = model.evaluate(train_dataset, verbose=0)
print(f"Train RMSE: {round(rmse, 3)}")
print("Evaluating model performance...")
_, rmse = model.evaluate(test_dataset, verbose=0)
print(f"Test RMSE: {round(rmse, 3)}")
FEATURE_NAMES = [
"Code",
"Class 1",
"Class 2",
"Class 3",
"Class 4",
"Class 5",
"Class 6",
"Class 7",
"Class 8",
"Class 9",
"Class 10",
"Class 11",
"Class 12",
"Class 13",
"Class 14",
"Class 15",
"Class 16",
"Class 17",
"Class 18",
"Class 19",
"Class 20",
"Class 21",
"Class 22",
"Class 23",
"Class 24",
"Class 25",
"Class 26",
"Class 27",
"Class 28",
"Class 29",
"Class 30",
"Class 31",
]
def create_model_inputs():
inputs = {}
for feature_name in FEATURE_NAMES:
inputs[feature_name] = layers.Input(
name=feature_name, shape=(1,),
dtype=tf.float32
)
return inputs
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = keras.Sequential(
[
tfp.layers.DistributionLambda(
lambda t: tfp.distributions.MultivariateNormalDiag(
loc=tf.zeros(n), scale_diag=tf.ones(n)
)
)
]
)
return prior_model
def posterior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
posterior_model = keras.Sequential(
[
tfp.layers.VariableLayer(
tfp.layers.MultivariateNormalTriL.params_size(n), dtype=dtype
),
tfp.layers.MultivariateNormalTriL(n),
]
)
return posterior_model
def create_probalistic_bnn_model(train_size):
inputs = create_model_inputs()
features = keras.layers.concatenate(list(inputs.values()))
features = layers.BatchNormalization()(features)
for units in hidden_units:
features = tfp.layers.DenseVariational(
units=units,
make_prior_fn=prior,
make_posterior_fn=posterior,
kl_weight=1/train_size,
activation="sigmoid",
)(features)
distribution_params = layers.Dense(units=2)(features)
outputs = tfp.layers.IndependentNormal(1)(distribution_params)
model = keras.Model(inputs=inputs, outputs=outputs)
return model
I get an Error in the following section: SOLVED
def negative_loglikelihood(targets, estimated_distribution):
return -estimated_distribution.log_prob(targets)
num_epochs = 10
prob_bnn_model = create_probalistic_bnn_model(train_size)
run_experiment(prob_bnn_model, negative_loglikelihood, train_dataset, test_dataset)
It says:
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_2040\2813959440.py in <module>
3
4 num_epochs = 10
----> 5 prob_bnn_model = create_probalistic_bnn_model(train_size)
6 run_experiment(prob_bnn_model, negative_loglikelihood, train_dataset, test_dataset)
~\AppData\Local\Temp\ipykernel_2040\1042147322.py in create_probalistic_bnn_model(train_size)
1 def create_probalistic_bnn_model(train_size):
2 inputs = create_model_inputs()
----> 3 features = keras.layers.concatenate(list(input.values()))
4 features = layers.BatchNormalization()(features)
5
AttributeError: 'function' object has no attribute 'values'
does that mean, the program could not read any values out of my dataframe?
Final Part:
prediction_distribution = prob_bnn_model(examples)
prediction_mean = prediction_distribution.mean().numpy().tolist()
prediction_stdv = prediction_distribution.stdev().numpy()
upper = (prediction_mean + (1.96 * prediction_stdv)).tolist()
lower = (prediction_mean - (1.96 * prediction_stdv)).tolist()
prediction_stdv = prediction_stdv.tolist()
for idx in range(sample):
print(
f"Prediction mean: {round(prediction_mean[idx][0],2)}, "
f"stddev: {round(prediction_stdv[idx][0], 2)}, "
f"95% CI: [{round(upper[idx][0], 2)} - {round(lower[idx][0], 2)}]"
f" - Actual: {target[idx]}"
)
Is there a problem reading a csv-file into dataframe? do i have to read out data from the csv and manually create a dataframe? but why is there no error before?
tyvm.
After editing 2 missspells i get the error:
NameError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_2040\1566603154.py in <module>
1 sample = 10
----> 2 examples, targets = list(test_dataset.unbatch().shuffle(batch_size * 10).batch(sample))[
3 0
4 ]
5 prediction_distribution = prob_bnn_model(examples)
NameError: name 'test_dataset' is not defined

Related

AttributeError: 'MapDataset' object has no attribute 'create_tf_dataset_for_client' in tensorflow_federated TFF

I want to test a federated learning model with non-IID using this API tff.simulation.datasets.build_single_label_dataset(), following these posts:
TensorFlow Federated: How to tune non-IIDness in federated dataset?
AttributeError: 'MapDataset' object has no attribute 'preprocess' in tensorflow_federated tff
AttributeError: 'MapDataset' object has no attribute 'client_ids' in tensorflow_federated TFF
But when I train the model, I got this error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-55-c558ef10b0d6> in <module>()
10
11 train(federated_averaging_process=federated_averaging, num_rounds=10,
---> 12 num_clients_per_round= 100, summary_writer=summary_writer)
1 frames
<ipython-input-54-c93c7edf73db> in <listcomp>(.0)
19 sampled_train_data = [
20 fed_emnist_train.create_tf_dataset_for_client(client)
---> 21 for client in sampled_clients
22 ]
23
AttributeError: 'MapDataset' object has no attribute 'create_tf_dataset_for_client'
Here is the code:
emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data(
only_digits=False)
# for non-IID we use this API tff.simulation.datasets.build_single_label_dataset()
fed_emnist_train = tff.simulation.datasets.build_single_label_dataset(
emnist_train.create_tf_dataset_from_all_clients(),
label_key='label', desired_label=1)
print(fed_emnist_train.element_spec)
MAX_CLIENT_DATASET_SIZE = 418
CLIENT_EPOCHS_PER_ROUND = 1
CLIENT_BATCH_SIZE = 20
TEST_BATCH_SIZE = 500
def reshape_emnist_element(element):
return (tf.expand_dims(element['pixels'], axis=-1), element['label'])
def preprocess_train_dataset(dataset):
return (dataset
.shuffle(buffer_size=MAX_CLIENT_DATASET_SIZE)
.repeat(CLIENT_EPOCHS_PER_ROUND)
.batch(CLIENT_BATCH_SIZE, drop_remainder=False)
.map(reshape_emnist_element))
fed_emnist_train = preprocess_train_dataset(fed_emnist_train)
# for unbalanced dataset
import random
NUM_CLIENTS = 100
client_datasets = [
fed_emnist_train.take(random.randint(1, CLIENT_BATCH_SIZE))
for _ in range(NUM_CLIENTS)
]
# defining a model
def create_original_fedavg_cnn_model(only_digits=False):
data_format = 'channels_last'
max_pool = functools.partial(
tf.keras.layers.MaxPooling2D,
pool_size=(2, 2),
padding='same',
data_format=data_format)
conv2d = functools.partial(
tf.keras.layers.Conv2D,
kernel_size=5,
padding='same',
data_format=data_format,
activation=tf.nn.relu)
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=(28, 28, 1)),
conv2d(filters=32),
max_pool(),
conv2d(filters=64),
max_pool(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dense(10 if only_digits else 62),
tf.keras.layers.Softmax(),
])
return model
input_spec = client_datasets[0].element_spec
def tff_model_fn():
keras_model = create_original_fedavg_cnn_model()
return tff.learning.from_keras_model(
keras_model=keras_model,
input_spec=input_spec,
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
# training the model
federated_averaging = tff.learning.build_federated_averaging_process(
model_fn=tff_model_fn,
client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))
# utility function
def format_size(size):
size = float(size)
for unit in ['bit','Kibit','Mibit','Gibit']:
if size < 1024.0:
return "{size:3.2f}{unit}".format(size=size, unit=unit)
size /= 1024.0
return "{size:.2f}{unit}".format(size=size, unit='TiB')
def set_sizing_environment():
sizing_factory = tff.framework.sizing_executor_factory()
context = tff.framework.ExecutionContext(executor_fn=sizing_factory)
tff.framework.set_default_context(context)
return sizing_factory
# trains the federated averaging process and output metrics
import tensorflow_datasets as tfds
def train(federated_averaging_process, num_rounds, num_clients_per_round, summary_writer):
# create a environment to get communication cost
environment = set_sizing_environment()
# initialize the FedAvg algorithm to get the initial server state
state = federated_averaging_process.initialize()
with summary_writer.as_default():
for round_num in range(num_rounds):
# sample the clients parcitipated in this round.
sampled_clients = np.random.choice(
emnist_train.client_ids,
size=num_clients_per_round,
replace=False)
# create a list of `tf.Dataset` instances from the data of sampled clients
sampled_train_data = [
fed_emnist_train.create_tf_dataset_for_client(client)
for client in sampled_clients
]
state, metrics = federated_averaging_process.next(state, sampled_train_data)
size_info = environment.get_size_info()
broadcasted_bits = size_info.broadcast_bits[-1]
aggregated_bits = size_info.aggregate_bits[-1]
print('round {:2d}, metrics={}, broadcasted_bits={}, aggregated_bits={}'.format(round_num, metrics, format_size(broadcasted_bits), format_size(aggregated_bits)))
# add metrics to Tensorboard
for name, value in metrics['train'].items():
tf.summary.scalar(name, value, step=round_num)
tf.summary.scalar('cumulative_broadcasted_bits', broadcasted_bits, step=round_num)
tf.summary.scalar('cumulative_aggregated_bits', aggregated_bits, step=round_num)
summary_writer.flush()
# first, clean the log directory to avoid conflicts
try:
tf.io.gfile.rmtree('/tmp/logs/scalars')
except tf.errors.OpError as e:
pass
# set up the log directory and writer for Tensorboard.
logdir = "/tmp/logs/scalars/original/"
summary_writer = tf.summary.create_file_writer(logdir)
train(federated_averaging_process=federated_averaging, num_rounds=10,
num_clients_per_round= 100, summary_writer=summary_writer)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-55-c558ef10b0d6> in <module>()
10
11 train(federated_averaging_process=federated_averaging, num_rounds=10,
---> 12 num_clients_per_round= 100, summary_writer=summary_writer)
1 frames
<ipython-input-54-c93c7edf73db> in <listcomp>(.0)
19 sampled_train_data = [
20 fed_emnist_train.create_tf_dataset_for_client(client)
---> 21 for client in sampled_clients
22 ]
23
AttributeError: 'MapDataset' object has no attribute 'create_tf_dataset_for_client'
I don't know where my mistake is?
Does the defining of input_spec is the reason?
input_spec = client_datasets[0].element_spec
or my mistake is here?
sampled_train_data = [
fed_emnist_train.create_tf_dataset_for_client(client)
for client in sampled_clients
]
Appreciate any help!

TORCH ERROR: return forward_call(*input, **kwargs) AND in _forward_unimplemented raise NotImplementedError NotImplementedError

I am trying to build J.A.R.V.I.S using torch, NLP, NN
but I am getting some errors in the training file(train.py)...
below I have put the code of train.py and other dependent files!
please help me out!
ERROR
Training the model...
Traceback (most recent call last):
File "c:\Users\Utkarsh\OneDrive\Documents\Programs\Python\AI\train.py", line 80, in <module>
outputs = model(words)
File "C:\Users\Utkarsh\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\nn\modules\module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\Utkarsh\AppData\Local\Programs\Python\Python38\lib\site-packages\torch\nn\modules\module.py", line 201, in _forward_unimplemented
raise NotImplementedError
NotImplementedError
train.py
import numpy as np
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from neural_network import bag_of_words, tokenize, stem
from brain import NeuralNet
with open('intents.json', 'r') as f:
intents = json.load(f)
all_words = []
tags = []
xy = []
for intent in intents['intents']:
tag = intent['tag']
tags.append(tag)
for pattern in intent['patterns']:
w = tokenize(pattern)
all_words.extend(w)
xy.append((w, tag))
ignore_words = [',', '?', '/', '.', '!']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
x_train = []
y_train = []
for (pattern_sentence, tag) in xy:
bag = bag_of_words(pattern_sentence, all_words)
x_train.append(bag)
label = tags.index(tag)
y_train.append(label)
x_train = np.array(x_train)
y_train = np.array(y_train)
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(x_train[0])
hidden_size = 8
output_size = len(tags)
print("Training the model...")
class chatDataset(Dataset):
def __init__(self):
self.n_samples = len(x_train)
self.x_data = x_train
self.y_data = y_train
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n_samples
dataset = chatDataset()
train_loader = DataLoader(
dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, output_size).to(device=device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for (words, labels) in train_loader:
words = words.to(device)
labels = labels.to(dtype=torch.long).to(device)
outputs = model(words)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch+1) % 100 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
print(f'Final loss: {loss.item():.4f}')
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}
FILE = "TrainData.pth"
torch.save(data, FILE)
print(f"Training Complete, File Saved to {FILE}")
other classes and files
Brain.py
import torch.nn as nn
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.l2 = nn.Linear(hidden_size, hidden_size)
self.l3 = nn.Linear(hidden_size, num_classes)
self.relu = nn.ReLU()
def forword(self, x):
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
out = self.relu(out)
out = self.l3(out)
return out
neural_network.py
import numpy as np
import nltk
from nltk.stem.porter import PorterStemmer
Stemmer = PorterStemmer()
def tokenize(sentence):
return nltk.word_tokenize(sentence)
def stem(word):
return Stemmer.stem(word.lower())
def bag_of_words(tokenized_sentence, words):
sentence_word = [stem(word) for word in tokenized_sentence]
bag = np.zeros(len(words), dtype=np.float32)
for idx, w in enumerate(words):
if w in sentence_word:
bag[idx] = 1
return bag
intents.json
{
"intents": [
{
"tag": "greeting",
"patterns": ["hello", "hi", "hey", "what's up", "karen", "hii", "wake up"],
"responses": ["Hello sir!", "How are you sir", "hello sir, what can I do for you"]
},
{
"tag": "bye",
"patterns": ["bye", "Good bye", "see you later", "abort", "exit", "quit", "sleep"],
"responses": ["bye sir!", "Good bye sir, see you later", "see you later, sir"]
}
]
}
train.py file has the error, neural_network.py and brain.py has some classes which are used in train.py. intents.json file contain the dataset which I have to train using train.py.
You have a typo instead of def forword(self, x) should be def forward(self, x).

Using tensorflow and TFBertForNextSentencePrediction to further train bert on a specific corpus

I'm trying to train TFBertForNextSentencePrediction on my own corpus, not from scratch, but rather taking the existing bert model with only a next sentence prediction head and further train it on a specific cuprous of text (pairs of sentences). Then I want to use the model I trained to be able to extract sentence embeddings from the last hidden state for other texts.
Currently the problem I encounter is that after I train the keras model I am not able to extract the hidden states of the last layer before the next sentence prediction head.
Below is the code. Here I only train it on a few sentences just to make sure the code works.
Any help will be greatly appreciated.
Thanks,
Ayala
import numpy as np
import pandas as pd
import tensorflow as tf
from datetime import datetime
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.callbacks import ModelCheckpoint
from transformers import BertTokenizer, PreTrainedTokenizer, BertConfig, TFBertForNextSentencePrediction
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
PRETRAINED_MODEL = 'bert-base-uncased'
# set paths and file names
time_stamp = str(datetime.now().year) + "_" + str(datetime.now().month) + "_" + str(datetime.now().day) + "_" + \
str(datetime.now().hour) + "_" + str(datetime.now().minute)
model_name = "pretrained_nsp_model"
model_dir_data = model_name + "_" + time_stamp
model_fn = model_dir_data + ".h5"
base_path = os.path.dirname(__file__)
input_path = os.path.join(base_path, "input_data")
output_path = os.path.join(base_path, "output_models")
model_path = os.path.join(output_path, model_dir_data)
if not os.path.exists(model_path):
os.makedirs(model_path)
# set model checkpoint
checkpoint = ModelCheckpoint(os.path.join(model_path, model_fn), monitor="val_loss", verbose=1, save_best_only=True,
save_weights_only=True, mode="min")
# read data
max_length = 512
def get_tokenizer(pretrained_model_name):
tokenizer = BertTokenizer.from_pretrained(pretrained_model_name)
return tokenizer
def tokenize_nsp_data(A, B, max_length):
data_inputs = tokenizer(A, B, add_special_tokens=True, max_length=max_length, truncation=True,
pad_to_max_length=True, return_attention_mask=True,
return_tensors="tf")
return data_inputs
def get_data_features(data_inputs, max_length):
data_features = {}
for key in data_inputs:
data_features[key] = sequence.pad_sequences(data_inputs[key], maxlen=max_length, truncating="post",
padding="post", value=0)
return data_features
def get_transformer_model(transformer_model_name):
# get transformer model
config = BertConfig(output_attentions=True)
config.output_hidden_states = True
config.return_dict = True
transformer_model = TFBertForNextSentencePrediction.from_pretrained(transformer_model_name, config=config)
return transformer_model
def get_keras_model(transformer_model):
# get keras model
input_ids = tf.keras.layers.Input(shape=(max_length,), name='input_ids', dtype='int32')
input_masks_ids = tf.keras.layers.Input(shape=(max_length,), name='attention_mask', dtype='int32')
token_type_ids = tf.keras.layers.Input(shape=(max_length,), name='token_type_ids', dtype='int32')
X = transformer_model({'input_ids': input_ids, 'attention_mask': input_masks_ids, 'token_type_ids': token_type_ids})[0]
model = tf.keras.Model(inputs=[input_ids, input_masks_ids, token_type_ids], outputs=X)
model.summary()
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer=tf.optimizers.Adam(learning_rate=0.00005), metrics=['accuracy'])
return model
def get_metrices(true_values, pred_values):
cm = confusion_matrix(true_values, pred_values)
acc_score = accuracy_score(true_values, pred_values)
f1 = f1_score(true_values, pred_values, average="binary")
precision = precision_score(true_values, pred_values, average="binary")
recall = recall_score(true_values, pred_values, average="binary")
metrices = {'confusion_matrix': cm,
'acc_score': acc_score,
'f1': f1,
'precision': precision,
'recall': recall
}
for k, v in metrices.items():
print(k, ':\n', v)
return metrices
# get tokenizer
tokenizer = get_tokenizer(PRETRAINED_MODEL)
# train
prompt = ["Hello", "Hello", "Hello", "Hello"]
next_sentence = ["How are you?", "Pizza", "How are you?", "Pizza"]
train_labels = [0, 1, 0, 1]
train_labels = to_categorical(train_labels)
train_inputs = tokenize_nsp_data(prompt, next_sentence, max_length)
train_data_features = get_data_features(train_inputs, max_length)
# val
prompt = ["Hello", "Hello", "Hello", "Hello"]
next_sentence = ["How are you?", "Pizza", "How are you?", "Pizza"]
val_labels = [0, 1, 0, 1]
val_labels = to_categorical(val_labels)
val_inputs = tokenize_nsp_data(prompt, next_sentence, max_length)
val_data_features = get_data_features(val_inputs, max_length)
# get transformer model
transformer_model = get_transformer_model(PRETRAINED_MODEL)
# get keras model
model = get_keras_model(transformer_model)
callback_list = []
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, min_delta=0.005, verbose=1)
callback_list.append(early_stop)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, epsilon=0.001)
callback_list.append(reduce_lr)
callback_list.append(checkpoint)
history = model.fit([train_data_features['input_ids'], train_data_features['attention_mask'],
train_data_features['token_type_ids']], np.array(train_labels), batch_size=2, epochs=3,
validation_data=([val_data_features['input_ids'], val_data_features['attention_mask'],
val_data_features['token_type_ids']], np.array(val_labels)), verbose=1,
callbacks=callback_list)
model.layers[3].save_pretrained(model_path) # need to save this and make sure i can get the hidden states
## predict
# load model
transformer_model = get_transformer_model(model_path)
model = get_keras_model(transformer_model)
model.summary()
model.load_weights(os.path.join(model_path, model_fn))
# test
prompt = ["Hello", "Hello"]
next_sentence = ["How are you?", "Pizza"]
test_labels = [0, 1]
test_df = pd.DataFrame({'A': prompt, 'B': next_sentence, 'label': test_labels})
test_labels = to_categorical(val_labels)
test_inputs = tokenize_nsp_data(prompt, next_sentence, max_length)
test_data_features = get_data_features(test_inputs, max_length)
# predict
pred_test = model.predict([test_data_features['input_ids'], test_data_features['attention_mask'], test_data_features['token_type_ids']])
preds = tf.keras.activations.softmax(tf.convert_to_tensor(pred_test)).numpy()
true_test = test_df['label'].to_list()
pred_test = [1 if p[1] > 0.5 else 0 for p in preds]
test_df['pred_val'] = pred_test
metrices = get_metrices(true_test, pred_test)
I am also attaching a picture from the debugging mode in which I try (with no success) to view the hidden state. The problem is I am not able to see and save the transform model I trained and view the embeddings of the last hidden state. I tried converting the KerasTensor to numpy array but without success.
The issue resides in your 'get_keras_model()' function. You defined here that you are only interested in the first of the element of the output (i.e. logits) with:
X = transformer_model({'input_ids': input_ids, 'attention_mask': input_masks_ids, 'token_type_ids': token_type_ids})[0]
Just do the index selection as conditional like this to get the whole output of the model
def get_keras_model(transformer_model, is_training=True):
###your other code
X = transformer_model({'input_ids': input_ids, 'attention_mask': input_masks_ids, 'token_type_ids': token_type_ids})
if is_training:
X= X[0]
###your other code
return model
#predict
###your other code
model = get_keras_model(transformer_model, is_training=False)
###your other code
print(pred_test.keys())
Output:
odict_keys(['logits', 'hidden_states', 'attentions'])
P.S.: The BertTokenizer can truncate and add padding by themself (documentation).

How to group the histograms shown on tensorboard in Keras?

I am trying to come to grips with the graphs shown on tensorboard when using Keras and trying to tune hyperparameters using HPARAMS.
As an example take a NN with dense layers and batch normalization layers to classify the MNIST digits from 0 to 4.
The function producing the summaries shown on tensorboard is the following:
def run(run_dir, hparams):
with tf.summary.create_file_writer(run_dir).as_default() as summ:
hp.hparams(hparams) # record the values used in this trial
num_layers = 5
m = train_test_model(hparams, run_dir, num_layers=num_layers)
with tf.name_scope("init_lr_{}".format(
hparams[HP_INITIAL_LEARNING_RATE])):
for i,l in enumerate(m.layers[1:]):
#print(l.get_config())
if "batch_normalization" in l.get_config()['name']:
w, b,c,d = l.get_weights()
tf.summary.histogram(name='W%d'%i, data=w, step=1, description="weigths of " + l.get_config()['name'])
tf.summary.histogram(name='B%d'%i, data=b, step=1, description="biases of " + l.get_config()['name'])
else:
w, b = l.get_weights()
tf.summary.histogram(name='W%d'%i, data=w, step=1, description="weigths of " + l.get_config()['name'])
tf.summary.histogram(name='B%d'%i, data=b, step=1, description="biases of " + l.get_config()['name'])
train_test_model is a function that trains different models using hp.Hparam for the hyperparameter variables. The problem is with the histograms. With this code the histogram graphs (of weights and biases) are named sequentially: so for example for the first model I have from W_1 to W_6 and from B1 to B_6, for the second from W7 to W_12. The model they belong to is shown at the top of the corresponding figure, but I think it is better to group the weights and biases according to the model they belong to and within each model have the histograms names from W_1 to W_6 and from B1 to B_6.
An example of what I get is the following where each model actually has only 5 Dense layers and 5 Batch normalization layers (excluding the one after the flatten layer):
To solve this problem I tried to add with tf.name_scope("init_lr_{}".format(
hparams[HP_INITIAL_LEARNING_RATE])): but this does not solve the problems: it keeps showing the previous graphs with the previous names and shows other graphs as in the following picture, having a different layout:
How can I make tensorboard group the histograms according to the NN model they belong to? Should I create a specific folder for each model trained?
FULL CODE
#SET UP
import tensorflow as tf
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp
import os
keras.backend.clear_session()
#LOAD DATA
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.mnist.load_data()
X_train_reduced = X_train_full[y_train_full<5]
y_train_reduced = y_train_full[y_train_full<5]
X_test_reduced = X_test[y_test<5]
y_test_reduced = y_test[y_test<5]
X_train = X_train_reduced[5000:]
y_train = y_train_reduced[5000:]
X_valid = X_train_reduced[:5000]
y_valid = y_train_reduced[:5000]
#set the hyperparameters to tune
keras.backend.clear_session()
HP_INITIAL_LEARNING_RATE = hp.HParam("initial_learning_rate", hp.Discrete([0.0001, 0.00012, 0.00015]))
HP_NUM_BATCH_SIZE = hp.HParam("batch_size", hp.Discrete([32]))
HP_NUM_EPOCHS = hp.HParam("epochs", hp.Discrete([180]))
HP_BETA_1 = hp.HParam("beta_1", hp.Discrete([0.95]))
HP_BETA_2 = hp.HParam("beta_2", hp.Discrete([0.9994]))
HP_DECAY_STEP = hp.HParam("decay_step", hp.Discrete([10000]))
HP_DECAY_RATE = hp.HParam("decay_rate", hp.Discrete([0.8]))
#function creating the summaries
def run(run_dir, hparams):
with tf.summary.create_file_writer(run_dir).as_default() as summ:
hp.hparams(hparams) # record the values used in this trial
num_layers = 5
m = train_test_model(hparams, run_dir, num_layers=num_layers)
with tf.name_scope("init_lr_{}".format(
hparams[HP_INITIAL_LEARNING_RATE])):
for i,l in enumerate(m.layers[1:]):
#print(l.get_config())
if "batch_normalization" in l.get_config()['name']:
w, b,c,d = l.get_weights()
tf.summary.histogram(name='W%d'%i, data=w, step=1, description="weigths of " + l.get_config()['name'])
tf.summary.histogram(name='B%d'%i, data=b, step=1, description="biases of " + l.get_config()['name'])
#tf.summary.histogram(name='3rd type of weight in batch %d'%i, data=c, step=1, description="3rd argument of " + l.get_config()['name'])
#tf.summary.histogram(name='4th type of weight in batch %d'%i, data=d, step=1, description="4th argument of " + l.get_config()['name'])
else:
w, b = l.get_weights()
tf.summary.histogram(name='W%d'%i, data=w, step=1, description="weigths of " + l.get_config()['name'])
tf.summary.histogram(name='B%d'%i, data=b, step=1, description="biases of " + l.get_config()['name'])
#function creating different models according to the hyperparameters passed with
# hparams
def train_test_model(hparams, folder, num_layers=5):
act_fun = "elu"
initializer = "he_normal"
NAME = "MNIST_0-4-earl_stp-nDens_{}-{}-{}-batch_norm-btch_sz_{}-epo_{}-Adam_opt-b1_{}-b2_{}-lr_exp_dec-in_lr_{}-lr_decRate_{}-lr_decStep_{}".format(
num_layers, act_fun, initializer, hparams[HP_NUM_BATCH_SIZE], hparams[HP_NUM_EPOCHS], hparams[HP_BETA_1],
hparams[HP_BETA_2], hparams[HP_INITIAL_LEARNING_RATE], hparams[HP_DECAY_RATE], hparams[HP_DECAY_STEP])
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28,28]))
input_size=28**2
model.add(keras.layers.BatchNormalization())
hidden_layer_neurons=100
for i in range(num_layers):
model.add(keras.layers.Dense(hidden_layer_neurons, activation=act_fun, kernel_initializer="he_normal"))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(5, activation='softmax'))
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate = hparams[HP_INITIAL_LEARNING_RATE],
decay_steps=hparams[HP_DECAY_STEP],
decay_rate=hparams[HP_DECAY_RATE])
my_opt = keras.optimizers.Adam(learning_rate=lr_schedule, beta_1=hparams[HP_BETA_1], beta_2 = hparams[HP_BETA_2])
early_stopping_cb = keras.callbacks.EarlyStopping(patience = 10, restore_best_weights=True)
#model_checkpoint_cb = keras.callbacks.ModelCheckpoint("GridSearchCV.h5", save_best_only=True)
run_index = 1 # increment every time you train the model
logdir = os.path.join(os.curdir, folder, NAME)
tensorboard_cb = keras.callbacks.TensorBoard(logdir, histogram_freq=1)
callbacks = [
early_stopping_cb,
tensorboard_cb,
hp.KerasCallback(logdir, hparams), # log hparams
]
model.compile(loss='sparse_categorical_crossentropy', metrics=["accuracy"], optimizer=my_opt)
model.fit(X_train, y_train, epochs=hparams[HP_NUM_EPOCHS], batch_size=hparams[HP_NUM_BATCH_SIZE],
validation_data=(X_valid, y_valid), callbacks=callbacks)
return model
session_num = 0
LOG_FOLDER='logs/batch_normalization/'
for num_batch in HP_NUM_BATCH_SIZE.domain.values:
for epoch in HP_NUM_EPOCHS.domain.values:
for beta_1 in HP_BETA_1.domain.values:
for beta_2 in HP_BETA_2.domain.values:
for initial_learning_rate in HP_INITIAL_LEARNING_RATE.domain.values:
for decay_rate in HP_DECAY_RATE.domain.values:
for decay_step in HP_DECAY_STEP.domain.values:
hparams = {
HP_NUM_BATCH_SIZE: num_batch,
HP_NUM_EPOCHS: epoch,
HP_BETA_1: beta_1,
HP_BETA_2: beta_2,
HP_INITIAL_LEARNING_RATE: initial_learning_rate,
HP_DECAY_RATE: decay_rate,
HP_DECAY_STEP: decay_step
}
#print('--- Starting trial: %s' % run_name)
print({h.name: hparams[h] for h in hparams})
run(LOG_FOLDER , hparams)
session_num += 1

Hyperopt on multiple subsets of a dataframe

I have the following dataframe:
#create some data with Names column
data = pd.DataFrame({'Names': ['Joe', 'John', 'Jasper', 'Jez'] *4, 'Add': ['Lo', 'Po', 'Fa',
'It']*4,'Ob1' : np.random.rand(16), 'Ob2' : np.random.rand(16)})
I want to run hyperopt based on only a few combination of 'Names' & 'Add'. Having said that I have split this dataframe into a subset using the folloiwng code:
# create a dataframe of all combinations
combinations=data[['Names','Add']].drop_duplicates()
comboList=list(zip(combinations['Names'],combinations['Add']))
comboList
[('Joe', 'Lo'), ('John', 'Po'), ('Jasper', 'Fa'), ('Jez', 'It')]
In [14]:
for i,combo in enumerate(comboList):
print(combo)
subset=data[(data['Names']==combo[0]) & (data['Add']==combo[1])]
# run multiple functions on subset here
print(subset.head())
Using the above logic I have built the functions:
from tqdm._tqdm_notebook import tqdm_notebook as tqdm
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Activation, Flatten
from keras import optimizers
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import roc_auc_score
import sys
def trim_dataset(mat, batch_size):
"""
trims dataset to a size that's divisible by BATCH_SIZE
"""
no_of_rows_drop = mat.shape[0] % batch_size
if no_of_rows_drop > 0:
return mat[:-no_of_rows_drop]
else:
return mat
def build_timeseries(mat, y_col_index, time_steps):
# total number of time-series samples would be len(mat) - TIME_STEPS
dim_0 = mat.shape[0] - time_steps
dim_1 = mat.shape[1]
x = np.zeros((dim_0, time_steps, dim_1))
y = np.zeros((x.shape[0],))
for i in tqdm(range(dim_0)):
x[i] = mat[i:time_steps + i]
y[i] = mat[time_steps + i, y_col_index]
# print("length of time-series i/o {} {}".format(x.shape, y.shape))
return x, y
def data(df,batch_size, time_steps):
global mat
BATCH_SIZE = 10
TIME_STEPS = 1
x_train, x_test = train_test_split(df, train_size=883, shuffle=False)
# scale the train and test dataset
min_max_scaler = MinMaxScaler()
x_train = min_max_scaler.fit_transform(x_train)
x_test = min_max_scaler.transform(x_test)
x_train_ts, y_train_ts = build_timeseries(x_train, 0, TIME_STEPS)
x_test_ts, y_test_ts = build_timeseries(x_test, 0, TIME_STEPS)
x_train_ts = trim_dataset(x_train_ts, BATCH_SIZE)
y_train_ts = trim_dataset(y_train_ts, BATCH_SIZE)
x_test_ts = trim_dataset(x_test_ts, BATCH_SIZE)
y_test_ts = trim_dataset(y_test_ts, BATCH_SIZE)
str(np.isnan(x_test).any())))
return x_train_ts, y_train_ts, x_test_ts, y_test_ts
stime = time.time()
def print_time(text, stime):
seconds = (time.time() - stime)
print(text + " " + str(seconds / 60) + " minutes : " + str(np.round(seconds % 60)) + " seconds")
def get_readable_ctime():
return time.strftime("%d-%m-%Y %H_%M_%S")
from keras.callbacks import Callback
class LogMetrics(Callback):
def __init__(self, search_params, param, comb_no):
self.param = param
self.self_params = search_params
self.comb_no = comb_no
def on_epoch_end(self, epoch, logs):
for i, key in enumerate(self.self_params.keys()):
logs[key] = self.param[key]
logs["combination_number"] = self.comb_no
search_space = {
'batch_size': hp.choice('bs', [10]),
'time_steps': hp.choice('ts', [1]),
'lstm1_nodes':hp.choice('units_lsmt1', [100,150,200,250,300]),
'lstm1_dropouts':hp.uniform('dos_lstm1',0,1),
'lstm_layers': hp.choice('num_layers_lstm',[
{
'layers':'one',
},
{
'layers':'two',
'lstm2_nodes':hp.choice('units_lstm2', [50,100,150]),
'lstm2_dropouts':hp.uniform('dos_lstm2',0,1)
}
]),
'dense_layers': hp.choice('num_layers_dense',[
{
'layers':'one'
},
{
'layers':'two',
'dense2_nodes':hp.choice('units_dense', [10,20,30,40])
}
]),
"lr": hp.uniform('lr',0,1),
"epochs": hp.choice('epochs', [50,100,150,200,300,500,1000,2000]),
"optimizer": hp.choice('optmz',["rms"])
}
def create_model_hypopt(params):
print("Trying params:",params)
batch_size = params["batch_size"]
time_steps = params["time_steps"]
start_time = time.clock()
comboList=list(zip(Roaming_Map['roaming_partner_country'],Roaming_Map['opco']))
for i,combo in enumerate(comboList):
print("As above for the country & opco - ",i,combo)
subset=data[(data['Names']==combo[0]) & (data['add']==combo[1])]
subset=subset[["Ob1","Ob2"]]
x_train_ts, y_train_ts, x_test_ts, y_test_ts = data(subset,10, 1)
lstm_model = Sequential()
lstm_model.add(LSTM(params["lstm1_nodes"], batch_input_shape=(batch_size, time_steps, x_train_ts.shape[2]), dropout=params["lstm1_dropouts"],
recurrent_dropout=params["lstm1_dropouts"], stateful=True, return_sequences=True,
kernel_initializer='random_uniform'))
# ,return_sequences=True #LSTM params => dropout=0.2, recurrent_dropout=0.2
if params["lstm_layers"]["layers"] == "two":
lstm_model.add(LSTM(params["lstm_layers"]["lstm2_nodes"], dropout=params["lstm_layers"]["lstm2_dropouts"]))
else:
lstm_model.add(Flatten())
if params["dense_layers"]["layers"] == 'two':
lstm_model.add(Dense(params["dense_layers"]["dense2_nodes"], activation='relu'))
lstm_model.add(Dense(1, activation='sigmoid'))
lr = params["lr"]
epochs = params["epochs"]
if params["optimizer"] == 'rms':
optimizer = optimizers.RMSprop(lr=lr)
else:
optimizer = optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True)
lstm_model.compile(loss='mean_squared_error', optimizer=optimizer) # binary_crossentropy
history = lstm_model.fit(x_train_ts, y_train_ts, epochs=epochs, verbose=2, batch_size=batch_size,
validation_data=[x_test_ts, y_test_ts],
callbacks=[LogMetrics(search_space, params, -1)])
# for key in history.history.keys():
# print(key, "--",history.history[key])
# get the highest validation accuracy of the training epochs
val_error = np.amin(history.history['val_loss'])
print('Best validation error of epoch number',epochs,'for the combination:',combo, val_error)
return {'loss': val_error, 'status': STATUS_OK, 'model': lstm_model} # if accuracy use '-' sign
trials = Trials()
best = fmin(create_model_hypopt,
space=search_space,
algo=tpe.suggest,
max_evals=10,
trials=trials)
print_time("program completed in", stime)
best_trials = sorted(trials.results, key=lambda x: x['loss'], reverse=False)
best_trials.append(best_trials)
But this runs for all the combination of Name & Add at once with the same hyperparameters & then goes to the next set of hyperparameters. Instead I want it to run all possible hyperparameters for one combination of name & add & then go for the next combination.
I solved this problem by running the loop outside the create_model function:
for i,combo in enumerate(comboList): print(combo)
subset=data[(data['country']==combo[0]) & (data['type']==combo[1])]
subset=subset[["Data"]]
x_train_ts, y_train_ts, x_test_ts, y_test_ts = data(subset,10, 1)
trials = Trials()
best = fmin(create_model_hypopt,
space=search_space,
algo=tpe.suggest,
max_evals=1,
trials=trials)
loss=trials.losses()
loss.append(loss)

Categories