I have used transfer learning (imagenet weights) and trained InceptionV3 to recognize two classes of images. The code looks like
InceptionV3_model = InceptionV3(input_shape=(150,150,3),weights='imagenet', include_top=False)
for layer in InceptionV3_model.layers[:249]:
layer.trainable = False
for layer in InceptionV3_model.layers[249:]:
layer.trainable = True
InceptionV3_last_output = InceptionV3_model.output
InceptionV3_maxpooled_output = Flatten()(InceptionV3_last_output)
InceptionV3_x = Dense(1024, activation='relu')(InceptionV3_maxpooled_output)
InceptionV3_x = Dropout(0.5)(InceptionV3_x)
InceptionV3_x = Dense(2, activation='softmax')(InceptionV3_x)
InceptionV3_x_final_model = Model(inputs=InceptionV3_model.input,outputs=InceptionV3_x)
InceptionV3_x_final_model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy',metrics=['accuracy'])
number_of_epochs = inception_epoch
inception_filepath = 'inceptionv3_'+'-saved-model-{epoch:02d}-loss-{loss:.2f}.hdf5'
inception_checkpoint = tf.keras.callbacks.ModelCheckpoint(inception_filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
inception_early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
inceptionv3_history = InceptionV3_x_final_model.fit(train_generator, epochs = number_of_epochs, validation_data = validation_generator,callbacks=[inception_checkpoint,inception_early_stopping],verbose=1)
do_history_stuff(inceptionv3_history, 'inceptionv3_model', True)
then i get the predictions using
def mode(my_list):
ct = Counter(my_list)
max_value = max(ct.values())
return ([key for key, value in ct.items() if value == max_value])
true_value = []
inception_pred = []
for folder in os.listdir(seg_test_folders):
test_image_ids = os.listdir(os.path.join(seg_test_folders,folder))
for image_id in test_image_ids[:int(len(test_image_ids))]:
path = os.path.join(seg_test_folders,folder,image_id)
true_value.append(validation_generator.class_indices[folder])
img = cv2.resize(cv2.imread(path),(150,150))
#img = cv2.imread(path)
img_normalized = img/255
#Inception
inception_image_prediction = np.argmax(inception_best_model.predict(np.array([img_normalized])))
inception_pred.append(inception_image_prediction)
I am trying to use gradcam to visualize a heatmap to see where the network focuses but it doesnt work. I am trying to use Chollet's guide but I am a newbee and I dont know how to match it to my code. Can you please help to customize the gradcam code?
I cannot find the penultimate layer of my model and I cannnot generate the heatmap that matches one of my own images as predicted by my model. The code I am trying to use is https://github.com/Abhijit-2592/Keras-custom-callbacks/blob/master/how%20to%20use%20grad-cam%20in%20inceptionv3_copy.ipynb. THis code uses the generic Inception_v3 and not my finetuned version. Can you please help match this code with mine?
Related
I am getting;
tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: Using a symbolic `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
Error while I'm trying to fit DDPG agent over custom environment.
Here is the CustomEnv()
class CustomEnv(Env):
def __init__(self):
print("Test_3 : Init")
"""NOTE: Bool array element definition for Box action space needs to be determined !!!!"""
self.action_space = Tuple((Box(low=4, high=20, shape=(1, 1)),
Box(low=0, high=1, shape=(1, 1)),
MultiBinary(1),
MultiBinary(1),
Box(low=4, high=20, shape=(1, 1)),
Box(low=0, high=1, shape=(1, 1)),
MultiBinary(1),
MultiBinary(1),
Box(low=0, high=100, shape=(1, 1)),
Box(low=0, high=100, shape=(1, 1))))
"""Accuracy array"""
self.observation_space = Box(low=np.asarray([0]), high=np.asarray([100]))
"""Initial Space"""
self.state = return_Acc(directory=source_dir, input_array=self.action_space.sample())
self.episode_length = 20
print(f"Action Space sample = {self.action_space.sample()}")
print("Test_3 : End Init")
def step(self, action):
print(f"Model Action Space Output = {action}")
print("Test_2 : Step")
accuracy_of_model = random.randint(0,100)#return_Acc(directory=source_dir, input_array=action)
self.state = accuracy_of_model#round(100*abs(accuracy_of_model))
self.episode_length -= 1
# Calculating the reward
print(f"self.state = {self.state}, accuracy_of_model = {accuracy_of_model}")
if (self.state > 60):
reward = self.state
else:
reward = -(60-self.state)*10
if self.episode_length <= 0:
done = True
else:
done = False
# Setting the placeholder for info
info = {}
# Returning the step information
print("Test_2 : End Step")
return self.state, reward, done, info
def reset(self):
print("Test_1 : Reset")
self.state = 50
print(f"Self state = {self.state}")
self.episode_length = 20
print("Test_1 : End Reset")
return self.state
return_Acc function runs a Random Decision Forrest Model and return it's accuracy to DDPG model for determining next step's parameters. For the last my DDPG model as given below;
states = env.observation_space.shape
actions = np.asarray(env.action_space.sample()).size
print(f"states = {states}, actions = {actions}")
def model_creation(states, actions):
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(32, activation='relu', input_shape=states))
model.add(tf.keras.layers.Dense(24, activation='relu'))
model.add(tf.keras.layers.Dense(actions, activation='linear'))
model.build()
return model
model = model_creation(states, actions)
model.summary()
def build_agent(model, actions, critic):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
nafa = DDPGAgent(nb_actions=actions, actor=model, memory=memory, critic=critic, critic_action_input=action_input)
#dqn = DQNAgent(model=model, memory=memory, policy=policy,
# nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return nafa
action_input = Input(shape=(actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())
dqn = build_agent(model, actions, critic)
dqn.compile(tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=200, visualize=False, verbose=1)
results = dqn.test(env, nb_episodes=500, visualize=False)
print(f"episode_reward = {np.mean(results.history['episode_reward'])}")
I tried most of the solutions that I found here like
tf.compat.v1.enable_eager_execution()
and combination of this with other functions. (Such as enable_v2_behaviour()) But I couldn't able to make this worked. If I don't run RDF model inside DDPG then there is no problem occurring. If it's possible how can I connect RDf model accuracy output to self.state as an input.
keras-rl2 1.0.5
tensorflow-macos 2.10.0
And I'm using M1 based mac if that's matter.
To anyone interested with the solution I came up with a slower but at least working solution. It's actually simpler than expected. Just insert a command which runs the model script from terminal and write its output to a text file, than read that text file from RL agent script and again write the action space values to a text file which then can be red from model to create observation.
I am training a model whose output and ground truth should be binary. It's an inception based two stream models. Inception architecture is used as an encoder and for decoder a custom based model is designed consisting of conv layers, batch normalization, up sampling and using tanh as non linearity.I have tried with relu but still no result.
Model is initializing at different values but not updating. My model's forward function is:
def forward(self, inp):
# Preprocessing
out = self.conv3d_1a_7x7(inp)
skip1 = out
out = self.maxPool3d_2a_3x3(out)
out = self.dropout(out)
out = self.conv3d_2b_1x1(out)
out = self.conv3d_2c_3x3(out)
out = self.maxPool3d_3a_3x3(out)
out = self.dropout(out)
out = self.mixed_3b(out)
skip2 = out
out = self.mixed_3c(out)
out = self.maxPool3d_4a_3x3(out)
out = self.dropout(out)
out = self.mixed_4b(out)
out = self.mixed_4c(out)
out = self.dropout(out)
out = self.mixed_4d(out)
skip3 = out
out = self.dropout(out)
out = self.mixed_4e(out)
out = self.mixed_4f(out)
out = self.maxPool3d_5a_2x2(out)
out = self.dropout(out)
out = self.mixed_5b(out)
out = self.mixed_5c(out)
out = self.dropout(out)
out = self.tconv6(out, skip1,skip2,skip3)
out = self.sigmoid(out)
print("Before permutation", out.shape)
out = out.permute(0,1,3,4,2)
out_logits = out
return out, out_logits
My train function is:
misc,out_logits[stream] = models[stream](data[stream])
out_softmax = torch.nn.functional.softmax(out_logits[stream], 1).requires_grad_()
val, preds = torch.max(out_logits[stream].data, 1)
preds = preds.to(device, dtype=torch.float)
gt = torch.round(gt)
gt_avg = torch.mean(gt)
gt[gt>gt_avg] = 1
gt[gt<=gt_avg] = 0
out_logits[stream] = out_logits[stream].squeeze(1)
losses[stream] = criterion(preds.cpu(), gt.cpu()).requires_grad_()
if phase == 'train':
optimizers[stream].zero_grad()
losses[stream].backward(retain_graph=True)
optimizers[stream].step()
running_losses[stream] += losses[stream].item() * data[stream].shape[0]
running_corrects[stream] += torch.sum(val.cpu() == gt_c.data.cpu()).item()
correct_t = torch.sum(preds==gt_c).item()
total_t = gt_c.shape[0]*gt_c.shape[1]*gt_c.shape[2]*gt_c.shape[3]
acc_epc = 100*correct_t/total_t
for scheduler in schedulers.values():
scheduler.step()
My loss and accuracy is always constant shown here
I have tried using different optimizers like SGD, Adam , RMSprop. Furthermore, I have tried tuning the hyperparameters but model is not converging. What am I missing?
You send the wrong variable into loss fuction if you are doing crossentropy. Change preds to out_logits[stream] and there's no need to do .cpu() and require_grad().
losses[stream] = criterion(out_logits[stream], gt)
Also, you performed argmax for preds. It's not differentiable regardless the loss function you used.
I am trying to modify the T5-model as a sequence labelling task (to do NER).
I create my model class by taking the last hidden states of the T5-model and add a linear layer with 3 out-features (for simple IOB-tags).
Here is my model class:
class Seq2SeqTokenCLS(nn.Module):
def __init__(self):
super(Seq2SeqTokenCLS, self).__init__()
self.num_labels = 3
self.base_model = T5ForConditionalGeneration.from_pretrained('t5-small')
# average of n last hidden layers
self.layers = 3
# change beam search or greedy search here
# Suggested parameters from the T5 paper: num_beams = 4 and length penalty alpha = 0.6
self.base_model.config.num_beams = 1 # <-- change to 1 for greedy decoding
self.base_model.config.length_penalty = 0.6 # <-- comment this out for greedy decoding
self.dropout = nn.Dropout(0.5)
self.dense = nn.Linear(in_features=512 * self.layers, out_features=self.num_labels)
def forward(self, input_ids, attn_mask, labels):
hidden_states = self.base_model(
input_ids,
attention_mask=attn_mask,
output_hidden_states=True
)
hidden_states = torch.cat([hidden_states['decoder_hidden_states'][-(n+1)] for n in range(self.layers)], dim=2)
logits = self.dense(self.dropout(hidden_states))
loss = None
loss_fct = nn.CrossEntropyLoss(weight=class_weights)
# Only keep active parts of the loss
if attn_mask is not None:
active_loss = attn_mask.view(-1) == 1
active_logits = logits.view(-1, self.num_labels)
active_labels = torch.where(
active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
)
loss = loss_fct(active_logits, active_labels)
else:
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return {'logits':logits,
'loss':loss}
However, I am confused about how should do inference in this approach. Should I use the .generate function as when T5 has a standard LM head? If that is the case, then I don't know how to inherit the function into my new model class...
Or can I use a normal evaluation loop?
E.g. something like this?:
predictions = []
all_labels = []
with torch.no_grad():
for batch in tqdm(test_loader):
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
outputs = model(input_ids=input_ids,
attn_mask=attention_mask
)
for sample, lab in zip(outputs['logits'],labels):
preds = torch.argmax(sample, dim=1)
predictions.append(preds)
all_labels.append(lab)
I would still like to experiment with beam search...
I am trying to implement the NER example using BERT and pytorch from the huggingface guide (https://huggingface.co/transformers/custom_datasets.html#ft-trainer). Reading in the data works fine, but when I want to start the training, I get the error
Expected input batch_size (16) to match target batch_size (4000)
When I try to change the input batch_size, the target batch_size is always the input batch_size*250. I would greatly appreciate, if someone could look over the following code and sees my mistakes.
from pathlib import Path
import re
def read_data(file_path):
file_path = Path(file_path)
raw_text = file_path.read_text().strip()
raw_docs = re.split(r'\n\t?\n', raw_text)
token_docs = []
tag_docs = []
for doc in raw_docs:
tokens = []
tags = []
for line in doc.split('\n'):
token, tag = line.split('\t')
tokens.append(token)
tags.append(tag)
token_docs.append(tokens)
tag_docs.append(tags)
return token_docs, tag_docs
train_texts, train_tags = read_data('data/train.tsv')
val_texts, val_tags = read_data('data/test.tsv')
unique_tags = set(tag for doc in tags for tag in doc)
tag2id = {tag: id for id, tag in enumerate(unique_tags)}
id2tag = {id: tag for tag, id in tag2id.items()}
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-cased')
train_encodings = tokenizer(train_texts, is_split_into_words=True, return_offsets_mapping=True, padding=True, truncation=True)
val_encodings = tokenizer(val_texts, is_split_into_words=True, return_offsets_mapping=True, padding=True, truncation=True)
import numpy as np
def encode_tags(tags, encodings):
labels = [[tag2id[tag] for tag in doc] for doc in tags]
encoded_labels = []
for doc_labels, doc_offset in zip(labels, encodings.offset_mapping):
# create an empty array of -100
doc_enc_labels = np.ones(len(doc_offset),dtype=int) * -100
arr_offset = np.array(doc_offset)
# set labels whose first offset position is 0 and the second is not 0
doc_enc_labels[(arr_offset[:,0] == 0) & (arr_offset[:,1] != 0)] = doc_labels
encoded_labels.append(doc_enc_labels.tolist())
return encoded_labels
train_labels = encode_tags(train_tags, train_encodings)
val_labels = encode_tags(val_tags, val_encodings)
import torch
class TestDataset(torch.utils.data.Dataset):
def __init__(self, encodings, labels):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
item['labels'] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.labels)
train_encodings.pop("offset_mapping") # we don't want to pass this to the model
val_encodings.pop("offset_mapping")
train_dataset = TestDataset(train_encodings, train_labels)
val_dataset = TestDataset(val_encodings, val_labels)
from transformers import DistilBertForTokenClassification
model = DistilBertForTokenClassification.from_pretrained('distilbert-base-cased', num_labels=len(unique_tags))
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
training_args = TrainingArguments(
output_dir='./results', # output directory
num_train_epochs=3, # total number of training epochs
per_device_train_batch_size=16, # batch size per device during training
per_device_eval_batch_size=64, # batch size for evaluation
warmup_steps=500, # number of warmup steps for learning rate scheduler
weight_decay=0.01, # strength of weight decay
logging_dir='./logs', # directory for storing logs
logging_steps=10,
)
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-cased")
trainer = Trainer(
model=model, # the instantiated 🤗 Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=val_dataset # evaluation dataset
)
trainer.train()
Instead of DistilBertForSequenceClassification Model, you want to use DistilBertForTokenClassification in the last cell.
I'm trying to implement a neural network model with TensorFlow for text classification. I created a custom layer in order to calculate a weighted average and to learn weights but I keep getting this weird error that tells "Cannot get value inside Tensorflow graph function". Here's what I've coded so far (For a small sample texts just to test).
This is my custom layer:
from tensorflow.keras import layers
import tensorflow as tf
import numpy as np
class WeightedAverageLayer(layers.Layer):
def __init__(self, vocab_size, embedding_dimension, doc_length):
super(WeightedAverageLayer, self).__init__()
self.embedding_dimension = embedding_dimension
self.vocab_size = vocab_size
self.doc_length = doc_length
#Initialize the embedding layer
self.embedding_layer = tf.keras.layers.Embedding(vocab_size, embedding_dimension, input_length=doc_length)
#Initialize the words importance
b_init = tf.zeros_initializer()
self.b = self.add_weight(shape=(vocab_size,), initializer='random_normal', trainable=True)
def calculate_average(self, doc):
doc_words = self.embedding_layer(doc)
word_embeddings = self.embedding_layer.get_weights()[0]
s = 0
avg = 0
z = np.zeros((self.embedding_dimension))
for i in range(self.embedding_dimension):
for j in range(len(doc)):
val = doc[j]
vw = word_embeddings[val]
a = self.b[val]
s = s + vw[i] * a
avg = s / len(doc)
z[i] = avg
s = 0
avg = 0
return z
def call(self, docs):
result = []
for doc in docs:
vect = self.calculate_average(doc)
result.append(vect)
result_np = np.array(result)
return tf.stack(result_np)
#return list(map(self.calculate_average, docs))
and this is the main file where i test my layer in a model:
vocab_size = 20
embedding_dim = 10
max_length = 4
# define documents
docs = ['Well done!', 'Good work', 'Great effort', 'nice work', 'Excellent!',
'Weak', 'Poor effort!', 'not good', 'poor work', 'Could have done better.', 'You are amazing']
# define class labels
labels = np.array([[1,0],[1,0],[1,0],[1,0],[1,0],[0,2],[0,2],[0,2],[0,2],[0,2],[1,0]])
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(docs)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(docs)
pad = pad_sequences(sequences, maxlen=max_length, padding="post")
average_layer = WeightedAverageLayer(vocab_size, embedding_dim, max_length )
output_layer = tf.keras.layers.Dense(2, activation='softmax')
input_docs = Input(shape=(max_length))
weighted_average = average_layer(input_docs)
output = output_layer(weighted_average)
model = Model(input_docs, output)
Please note that I tested the layer alone and it worked fine.