Constant loss and accuracy in pytorch - python

I am training a model whose output and ground truth should be binary. It's an inception based two stream models. Inception architecture is used as an encoder and for decoder a custom based model is designed consisting of conv layers, batch normalization, up sampling and using tanh as non linearity.I have tried with relu but still no result.
Model is initializing at different values but not updating. My model's forward function is:
def forward(self, inp):
# Preprocessing
out = self.conv3d_1a_7x7(inp)
skip1 = out
out = self.maxPool3d_2a_3x3(out)
out = self.dropout(out)
out = self.conv3d_2b_1x1(out)
out = self.conv3d_2c_3x3(out)
out = self.maxPool3d_3a_3x3(out)
out = self.dropout(out)
out = self.mixed_3b(out)
skip2 = out
out = self.mixed_3c(out)
out = self.maxPool3d_4a_3x3(out)
out = self.dropout(out)
out = self.mixed_4b(out)
out = self.mixed_4c(out)
out = self.dropout(out)
out = self.mixed_4d(out)
skip3 = out
out = self.dropout(out)
out = self.mixed_4e(out)
out = self.mixed_4f(out)
out = self.maxPool3d_5a_2x2(out)
out = self.dropout(out)
out = self.mixed_5b(out)
out = self.mixed_5c(out)
out = self.dropout(out)
out = self.tconv6(out, skip1,skip2,skip3)
out = self.sigmoid(out)
print("Before permutation", out.shape)
out = out.permute(0,1,3,4,2)
out_logits = out
return out, out_logits
My train function is:
misc,out_logits[stream] = models[stream](data[stream])
out_softmax = torch.nn.functional.softmax(out_logits[stream], 1).requires_grad_()
val, preds = torch.max(out_logits[stream].data, 1)
preds = preds.to(device, dtype=torch.float)
gt = torch.round(gt)
gt_avg = torch.mean(gt)
gt[gt>gt_avg] = 1
gt[gt<=gt_avg] = 0
out_logits[stream] = out_logits[stream].squeeze(1)
losses[stream] = criterion(preds.cpu(), gt.cpu()).requires_grad_()
if phase == 'train':
optimizers[stream].zero_grad()
losses[stream].backward(retain_graph=True)
optimizers[stream].step()
running_losses[stream] += losses[stream].item() * data[stream].shape[0]
running_corrects[stream] += torch.sum(val.cpu() == gt_c.data.cpu()).item()
correct_t = torch.sum(preds==gt_c).item()
total_t = gt_c.shape[0]*gt_c.shape[1]*gt_c.shape[2]*gt_c.shape[3]
acc_epc = 100*correct_t/total_t
for scheduler in schedulers.values():
scheduler.step()
My loss and accuracy is always constant shown here
I have tried using different optimizers like SGD, Adam , RMSprop. Furthermore, I have tried tuning the hyperparameters but model is not converging. What am I missing?

You send the wrong variable into loss fuction if you are doing crossentropy. Change preds to out_logits[stream] and there's no need to do .cpu() and require_grad().
losses[stream] = criterion(out_logits[stream], gt)
Also, you performed argmax for preds. It's not differentiable regardless the loss function you used.

Related

Keras - ValueError: Could not interpret loss function identifier

I am trying to build the autoencoder structure detailed in this IEEE article. The autoencoder uses a separable loss function where it is required that I create a custom loss function for the "cluster loss" term of the separable loss function as a function of the average output of the encoder. I create my own layer called RffConnected that calculates the cluster loss and utilizes the add_loss method. Otherwise, this RffConnected layer should act as just a normal deep layer.
Here are my relevant code snippets:
import matplotlib.pyplot as plot
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import math
from matplotlib.figure import Figure
import tensorflow as tf
import keras
from keras import layers
import random
import time
from os import listdir
#loads data from a text file
def loadData(basePath, samplesPerFile, sampleRate):
real = []
imag = []
fileOrder = []
for file in listdir(basePath):
if((file != "READ_ME") and ((file != "READ_ME.txt"))):
fid = open(basePath + "\\" + file, "r")
fileOrder.append(file)
t = 0
sampleEvery = samplesPerFile / sampleRate
temp1 = []
temp2 = []
times = []
for line in fid.readlines():
times.append(t)
samples = line.split("\t")
temp1.append(float(samples[0]))
temp2.append(float(samples[1]))
t = t + sampleEvery
real.append(temp1)
imag.append(temp2)
fid.close()
real = np.array(real)
imag = np.array(imag)
return real, imag, times, fileOrder
#####################################################################################################
#Breaks up and randomizes data
def breakUpData(real, imag, times, numPartitions, basePath):
if(len(real) % numPartitions != 0):
raise ValueError("Error: The length of the dataset must be divisible by the number of partitions.")
newReal = []
newImag = []
newTimes = []
fileOrder = listdir(basePath)
dataFiles = []
interval = int(len(real[0]) / numPartitions)
for i in range(0, interval):
newTimes.append(times[i])
for i in range(0, len(real)):
tempI = []
tempQ = []
for j in range(0, len(real[0])):
tempI.append(real[i, j])
tempQ.append(imag[i, j])
if((j + 1) % interval == 0):
newReal.append(tempI)
newImag.append(tempQ)
#fileName = fileOrder[i][0: fileOrder[i].find("_") + 3]
dataFiles.append(fileOrder[i])
tempI = []
tempQ = []
#randomizes the broken up dataset and the file list
for i in range(0, len(newReal)):
r = random.randint(0, len(newReal) - 1)
tempReal = newReal[i]
tempImag = newImag[i]
newReal[i] = newReal[r]
newImag[i] = newImag[r]
newReal[r] = tempReal
newImag[r] = tempImag
tempFile = dataFiles[i]
dataFiles[i] = dataFiles[r]
dataFiles[r] = tempFile
#return np.array(newReal), np.array(newImag), newTimes, dataFiles
return newReal, newImag, newTimes, dataFiles
#####################################################################################################
#custom loss layer for the RffAe-S that calculates the clustering loss term
class RffConnected(layers.Layer):
def __init__(self, output_dim, batchSize, beta, alpha):
super(RffConnected, self).__init__()
# self.total = tf.Variable(initial_value=tf.zeros((input_dim,)), trainable=False)
#array = np.zeros(output_dim)
self.iters = 0.0
self.beta = beta
self.alpha = alpha
self.batchSize = batchSize
self.output_dim = output_dim
self.sum = tf.zeros(output_dim, tf.float64)
self.moving_average = tf.zeros(output_dim, tf.float64)
self.clusterloss = tf.zeros(output_dim, tf.float64)
self.sum = tf.cast(self.sum, tf.float32)
self.moving_average = tf.cast(self.moving_average, tf.float32)
self.clusterloss = tf.cast(self.clusterloss, tf.float32)
# self.sum = keras.Input(shape=(self.output_dim,))
# self.moving_average = keras.Input(shape=(self.output_dim,))
# self.clusterloss = keras.Input(shape=(self.output_dim,))
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel', \
shape = (int(input_shape[-1]), self.output_dim), \
initializer = 'normal', trainable = True)
#self.kernel = tf.cast(self.kernel, tf.float64)
super(RffConnected, self).build(int(input_shape[-1]))
def call(self, inputs):
#keeps track of training epochs
self.iters = self.iters + 1
#inputs = tf.cast(inputs, tf.float64)
#where this custom layer acts as a normal layer- the loss then uses this
#calc = keras.backend.dot(inputs, self.kernel)
calc = tf.matmul(inputs, self.kernel)
#cumulative sum of deep encoded features
#self.sum = state_ops.assign(self.sum, tf.reshape(tf.math.add(self.sum, calc), tf.shape(self.sum)))
#self.sum = tf.ops.state_ops.assign(self.sum, tf.math.add(self.sum, calc))
#self.sum.assign_add(calc)
self.sum = tf.math.add(self.sum, calc)
#calculate the moving average and loss if we have already trained one batch
if(self.iters >= self.batchSize):
self.moving_average = tf.math.divide(self.sum, self.iters)
self.clusterloss = tf.math.exp(\
tf.math.multiply(-1 * self.beta, tf.math.reduce_sum(tf.math.square(tf.math.subtract(inputs, self.moving_average)))))
#self.add_loss(tf.math.multiply(self.clusterloss, self.alpha))
self.add_loss(self.clusterloss.numpy() * self.alpha)
return calc
#####################################################################################################
def customloss(y_true, y_pred):
loss = tf.square(y_true - y_pred)
print(loss)
return loss
#####################################################################################################
realTraining = np.array(real[0:2200])
realTesting = np.array(real[2200:-1])
imagTraining = np.array(imag[0:2200])
imagTesting = np.array(imag[2200:-1])
numInputs = len(realTraining[0])
i_sig = keras.Input(shape=(numInputs,))
q_sig = keras.Input(shape=(numInputs,))
iRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(i_sig)
rff1 = keras.Model(inputs=i_sig, outputs=iRff)
qRff = tf.keras.layers.experimental.RandomFourierFeatures(numInputs, \
kernel_initializer='gaussian', scale=9.0)(q_sig)
rff2 = keras.Model(inputs=q_sig, outputs=qRff)
combined = layers.Concatenate()([iRff, qRff])
combineRff = tf.keras.layers.experimental.RandomFourierFeatures(4 * numInputs, \
kernel_initializer='gaussian', scale=10.0)(combined)
preprocess = keras.Model(inputs=[iRff, qRff], outputs=combineRff)
#print(realTraining[0:5])
preprocessedTraining = preprocess.predict([realTraining, imagTraining])
preprocessedTesting = preprocess.predict([realTesting, imagTesting])
################## Entering Encoder ######################
encoderIn = keras.Input(shape=(4*numInputs,))
#connected1 = layers.Dense(100, activation="sigmoid")(encoderIn)
clusterLossLayer = RffConnected(100, 30, 1.00, 100.00)(encoderIn)
#clusterLossLayer = myRffConnected(256)(connected1)
encoder = keras.Model(inputs=encoderIn, outputs=clusterLossLayer)
################## Entering Decoder ######################
connected2 = layers.Dense(125, activation="sigmoid")(clusterLossLayer)
relu1 = layers.ReLU()(connected2)
dropout = layers.Dropout(0.2)(relu1)
reshape1 = layers.Reshape((25, 5, 1))(dropout)
bn1 = layers.BatchNormalization()(reshape1)
trans1 = layers.Conv2DTranspose(1, (4, 2))(bn1)
ups1 = layers.UpSampling2D(size=(2, 1))(trans1)
relu2 = layers.ReLU()(ups1)
bn2 = layers.BatchNormalization()(relu2)
trans2 = layers.Conv2DTranspose(1, (4, 2))(bn2)
ups2 = layers.UpSampling2D(size=(2, 1))(trans2)
relu3 = layers.ReLU()(ups2)
bn3 = layers.BatchNormalization()(relu3)
trans3 = layers.Conv2DTranspose(1, (5, 2))(bn3)
ups3 = layers.UpSampling2D(size=(2, 1))(trans3)
relu4 = layers.ReLU()(ups3)
bn4 = layers.BatchNormalization()(relu4)
trans4 = layers.Conv2DTranspose(1, (7, 1))(bn4)
reshape2 = layers.Reshape((4*numInputs, 1, 1))(trans4)
autoencoder = keras.Model(inputs=encoderIn, outputs=reshape2)
encoded_input = keras.Input(shape=(None, 100))
decoder_layer = autoencoder.layers[-1]
#autoencoder.summary()
autoencoder.compile(optimizer='adam', loss=[autoencoder.losses[-1], customloss], metrics=['accuracy', 'accuracy'])
autoencoder.fit(preprocessedTraining, preprocessedTraining, epochs=100, batch_size=20, shuffle=True, validation_data=(preprocessedTesting, preprocessedTesting))
It seems like it runs for two training epochs then it gives me an error. I end up getting this error when I run it:
ValueError: Could not interpret loss function identifier: Tensor("rff_connected_137/Const:0", shape=(100,), dtype=float32)
I've already spent a considerable amount of time debugging this thing, although if you spot any more errors I would appreciate a heads-up. Thank you in advance.
According to the documentation of the keras Keras Model Training-Loss, the 'loss' attribute can take the value of float tensor (except for the sparse loss functions returning integer arrays) with a specific shape.
If it is necessary to combine two loss functions, it would be better to perform mathematical calculations within your custom loss function to return an output of float tensor. This reference might be a help Keras CustomLoss definition.

Keras-RL2 and Tensorflow 1-2 Incompatibility

I am getting;
tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: Using a symbolic `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
Error while I'm trying to fit DDPG agent over custom environment.
Here is the CustomEnv()
class CustomEnv(Env):
def __init__(self):
print("Test_3 : Init")
"""NOTE: Bool array element definition for Box action space needs to be determined !!!!"""
self.action_space = Tuple((Box(low=4, high=20, shape=(1, 1)),
Box(low=0, high=1, shape=(1, 1)),
MultiBinary(1),
MultiBinary(1),
Box(low=4, high=20, shape=(1, 1)),
Box(low=0, high=1, shape=(1, 1)),
MultiBinary(1),
MultiBinary(1),
Box(low=0, high=100, shape=(1, 1)),
Box(low=0, high=100, shape=(1, 1))))
"""Accuracy array"""
self.observation_space = Box(low=np.asarray([0]), high=np.asarray([100]))
"""Initial Space"""
self.state = return_Acc(directory=source_dir, input_array=self.action_space.sample())
self.episode_length = 20
print(f"Action Space sample = {self.action_space.sample()}")
print("Test_3 : End Init")
def step(self, action):
print(f"Model Action Space Output = {action}")
print("Test_2 : Step")
accuracy_of_model = random.randint(0,100)#return_Acc(directory=source_dir, input_array=action)
self.state = accuracy_of_model#round(100*abs(accuracy_of_model))
self.episode_length -= 1
# Calculating the reward
print(f"self.state = {self.state}, accuracy_of_model = {accuracy_of_model}")
if (self.state > 60):
reward = self.state
else:
reward = -(60-self.state)*10
if self.episode_length <= 0:
done = True
else:
done = False
# Setting the placeholder for info
info = {}
# Returning the step information
print("Test_2 : End Step")
return self.state, reward, done, info
def reset(self):
print("Test_1 : Reset")
self.state = 50
print(f"Self state = {self.state}")
self.episode_length = 20
print("Test_1 : End Reset")
return self.state
return_Acc function runs a Random Decision Forrest Model and return it's accuracy to DDPG model for determining next step's parameters. For the last my DDPG model as given below;
states = env.observation_space.shape
actions = np.asarray(env.action_space.sample()).size
print(f"states = {states}, actions = {actions}")
def model_creation(states, actions):
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(32, activation='relu', input_shape=states))
model.add(tf.keras.layers.Dense(24, activation='relu'))
model.add(tf.keras.layers.Dense(actions, activation='linear'))
model.build()
return model
model = model_creation(states, actions)
model.summary()
def build_agent(model, actions, critic):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
nafa = DDPGAgent(nb_actions=actions, actor=model, memory=memory, critic=critic, critic_action_input=action_input)
#dqn = DQNAgent(model=model, memory=memory, policy=policy,
# nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return nafa
action_input = Input(shape=(actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())
dqn = build_agent(model, actions, critic)
dqn.compile(tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=200, visualize=False, verbose=1)
results = dqn.test(env, nb_episodes=500, visualize=False)
print(f"episode_reward = {np.mean(results.history['episode_reward'])}")
I tried most of the solutions that I found here like
tf.compat.v1.enable_eager_execution()
and combination of this with other functions. (Such as enable_v2_behaviour()) But I couldn't able to make this worked. If I don't run RDF model inside DDPG then there is no problem occurring. If it's possible how can I connect RDf model accuracy output to self.state as an input.
keras-rl2 1.0.5
tensorflow-macos 2.10.0
And I'm using M1 based mac if that's matter.
To anyone interested with the solution I came up with a slower but at least working solution. It's actually simpler than expected. Just insert a command which runs the model script from terminal and write its output to a text file, than read that text file from RL agent script and again write the action space values to a text file which then can be red from model to create observation.

Is there a way to fix param.grad = none in pytorch model?

I am working on the Point Cloud Registration Network(PCRNET) and I have a issue with the training process. For that I wrote a pytorch model that consists of 5 convolutional layers and 5 fully connected layers. My custom loss output changes with each new initialization of the network but then for each epoch I obtain the same values for each batch. Therefore no training is happening. I narrowed the error down to the fact that no gradients are being computed.
Here is my network and forward pass
class pcrnetwork(nn.Module):
def __init__(self,):
# This is the network that gets initialized with every new instance
super().__init__()
self.conv1 = nn.Conv1d(3,64,1, padding="valid")
self.conv2 = nn.Conv1d(64,64,1,padding="valid")
self.conv3 = nn.Conv1d(64,64,1,padding="valid")
self.conv4 = nn.Conv1d(64,128,1,padding="valid")
self.conv5 = nn.Conv1d(128,1024,1,padding="valid")
self.fc1 = nn.Linear(2048,1024)
self.fc2 = nn.Linear(1024,512)
self.fc3 = nn.Linear(512,512)
self.fc4 = nn.Linear(512,256)
self.fc5 = nn.Linear(256,6)
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(64)
self.bn3 = nn.BatchNorm1d(64)
self.bn4 = nn.BatchNorm1d(128)
self.bn6 = nn.BatchNorm1d(1024)
self.bn7 = nn.BatchNorm1d(512)
self.bn8 = nn.BatchNorm1d(512)
self.bn9 = nn.BatchNorm1d(256)
def forward1(self,input,input1,points):
point_cloud = torch.cat((input,input1),dim=2)
net = Func.relu(self.bn1(self.conv1(point_cloud)))
net = Func.relu(self.bn2(self.conv2(net)))
net = Func.relu(self.bn3(self.conv3(net)))
net = Func.relu(self.bn4(self.conv4(net)))
net = Func.relu(self.conv5(net))
net_s = net[:,:,0:points]
net_t = net[:,:,points:points*2]
pool = nn.MaxPool1d(net_s.size(-1))(net_s)
pool2 = nn.MaxPool1d(net_t.size(-1))(net_t)
global_feature = torch.cat((pool,pool2),1)
#global_feature = torch.squeeze(global_feature,dim=2)
global_feature = torch.flatten(global_feature,start_dim=1)
# fully connected part
net = Func.relu(self.bn6(self.fc1(global_feature)))
net = Func.relu(self.bn7(self.fc2(net)))
net = Func.relu(self.bn8(self.fc3(net)))
net = Func.relu(self.bn9(self.fc4(net)))
net = Func.relu(self.fc5(net))
pose = net
output = appply_transformation(torch.transpose(input,1,2),pose)
return output
my training loop looks like this:
def train1():
losss = []
for epoch in range(1):
model.train()
total_loss = 0.0
#poses = []
for idx, data in enumerate(train_loader,0):
x = data["source"] # shape= [32,2048,3]
y = data["target"]
x = torch.transpose(x,1,2)
x = x.to(device)
y = torch.transpose(y,1,2)
y = y.to(device)
optimizer.zero_grad()
output = model.forward1(x,y,2048)
y = torch.transpose(y,1,2)
loss = og_chamfer1(y,output)
loss.backward()
optimizer.step()
print(loss.item())
And finally here is the code for my loss function. The idea here is to let the network calculate 6 parameters(3 rotational, 3 translational) that get fed into my apply transformation function. Then my actual loss(=Chamfer Distance) is being calculated on the transformed source point cloud and the target point cloud.
def dist_vec(source, targ):
#AB = torch.matmul(targ,torch.transpose(source,1,2))
AB = torch.matmul(targ,torch.transpose(source,0,1))
#print("ab hat die shape",AB.shape)
AA = torch.sum(torch.square(targ),1)
#AA = AA[:,:,None]
#print("AA hat die shape", AA.shape)
BB = torch.sum(torch.square(source),1)
#BB = BB[:,:,None]
dist_matrix = torch.transpose((BB - 2 * AB), 0,1) + AA
return dist_matrix
def og_chamfer1(sourc,targ): # source =[32,2048,3]
batch_loss1 = torch.zeros(size=(len(sourc),))
batch_loss = []
#print(len(source))
for i in range(len(sourc)):
dist = dist_vec(sourc[i],targ[i])
#print("dist hat die shape", dist.shape)
min_x_val, min_x_idx = torch.min(dist, axis=0)
#print("this is minx", min_x_val)
#min_x = torch.tensor(min_x[0])
min_y_val, min_y_idx = torch.min(dist,axis=1)
#print("this is min y", min_y_val)
mean = torch.mean(min_x_val) + torch.mean(min_y_val)
batch_loss1[i] = mean
#batch_loss.append(mean)
#print(batch_loss)
#print(len(batch_loss))
#batch_loss_total = sum(batch_loss)/len(sourc)
#print(mean.shape)
batch_loss1 = torch.mean(batch_loss1)
return batch_loss1
all of these functions should work, I just post them for reference. I think the problem for para.grad=None lays somewhere in my apply transformation function:
def rotate_cloud_by_angle_z(input, rotation_angle):
# the input here should have shape=(num.of points x 3)
# dtype for the rotation matrix needs to be set to float64
cosval = torch.cos(rotation_angle) # DONT USE TF.MATH.COS BECAUSE U GET A TENSOR NOT A NUMBER
sinval = torch.sin(rotation_angle)
#print("sinval hat shape:",sinval.shape)
#cosval = torch.from_numpy(cosval)
#sinval = torch.from_numpy(sinval)
rotation_matrix =torch.tensor([[cosval.item(),-sinval.item(),0],[sinval.item(),cosval.item(),0],[0,0,1]],dtype=torch.float32, requires_grad=False)
rotation_matrix = rotation_matrix.to(device)
product = torch.matmul(input, rotation_matrix)
return product
def appply_transformation(datas,poses):
transformed_data = datas
#print("poses hat die shape", poses.shape)
for i in range(datas.shape[0]):
#print("poses[i,5] hat shape:", poses[i,5])
#print("poses hat shape:", poses.shape)
transformed_data[i,:,:] = rotate_cloud_by_angle_z(transformed_data[i,:,:].clone(),poses[i,5])
#print(poses.shape)
#print("poses[i,5] hat shape:", poses[i,5])
transformed_data[i,:,:] = rotate_cloud_by_angle_y(transformed_data[i,:,:].clone(),poses[i,4])
transformed_data[i,:,:] = rotate_cloud_by_angle_x(transformed_data[i,:,:].clone(),poses[i,3])
transformed_data[i,:,:] = translation(transformed_data[i,:,:].clone(),torch.tensor([poses[i,0],poses[i,1],poses[i,2]],requires_grad=False).to(device))
return transformed_data
on https://discuss.pytorch.org/t/model-param-grad-is-none-how-to-debug/52634/3 I could find out that one shouldn't use .item() or rewrapping of tensors like x = torch.tensor(x) but essentially I don't know how to change my apply transformation function in such that the gradient calculation works.
If anyone has any tips on that I would be super grateful!

Class activation heatmap InceptionV3 transfer learning

I have used transfer learning (imagenet weights) and trained InceptionV3 to recognize two classes of images. The code looks like
InceptionV3_model = InceptionV3(input_shape=(150,150,3),weights='imagenet', include_top=False)
for layer in InceptionV3_model.layers[:249]:
layer.trainable = False
for layer in InceptionV3_model.layers[249:]:
layer.trainable = True
InceptionV3_last_output = InceptionV3_model.output
InceptionV3_maxpooled_output = Flatten()(InceptionV3_last_output)
InceptionV3_x = Dense(1024, activation='relu')(InceptionV3_maxpooled_output)
InceptionV3_x = Dropout(0.5)(InceptionV3_x)
InceptionV3_x = Dense(2, activation='softmax')(InceptionV3_x)
InceptionV3_x_final_model = Model(inputs=InceptionV3_model.input,outputs=InceptionV3_x)
InceptionV3_x_final_model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy',metrics=['accuracy'])
number_of_epochs = inception_epoch
inception_filepath = 'inceptionv3_'+'-saved-model-{epoch:02d}-loss-{loss:.2f}.hdf5'
inception_checkpoint = tf.keras.callbacks.ModelCheckpoint(inception_filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
inception_early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
inceptionv3_history = InceptionV3_x_final_model.fit(train_generator, epochs = number_of_epochs, validation_data = validation_generator,callbacks=[inception_checkpoint,inception_early_stopping],verbose=1)
do_history_stuff(inceptionv3_history, 'inceptionv3_model', True)
then i get the predictions using
def mode(my_list):
ct = Counter(my_list)
max_value = max(ct.values())
return ([key for key, value in ct.items() if value == max_value])
true_value = []
inception_pred = []
for folder in os.listdir(seg_test_folders):
test_image_ids = os.listdir(os.path.join(seg_test_folders,folder))
for image_id in test_image_ids[:int(len(test_image_ids))]:
path = os.path.join(seg_test_folders,folder,image_id)
true_value.append(validation_generator.class_indices[folder])
img = cv2.resize(cv2.imread(path),(150,150))
#img = cv2.imread(path)
img_normalized = img/255
#Inception
inception_image_prediction = np.argmax(inception_best_model.predict(np.array([img_normalized])))
inception_pred.append(inception_image_prediction)
I am trying to use gradcam to visualize a heatmap to see where the network focuses but it doesnt work. I am trying to use Chollet's guide but I am a newbee and I dont know how to match it to my code. Can you please help to customize the gradcam code?
I cannot find the penultimate layer of my model and I cannnot generate the heatmap that matches one of my own images as predicted by my model. The code I am trying to use is https://github.com/Abhijit-2592/Keras-custom-callbacks/blob/master/how%20to%20use%20grad-cam%20in%20inceptionv3_copy.ipynb. THis code uses the generic Inception_v3 and not my finetuned version. Can you please help match this code with mine?

Modifying T5 for sequence labelling

I am trying to modify the T5-model as a sequence labelling task (to do NER).
I create my model class by taking the last hidden states of the T5-model and add a linear layer with 3 out-features (for simple IOB-tags).
Here is my model class:
class Seq2SeqTokenCLS(nn.Module):
def __init__(self):
super(Seq2SeqTokenCLS, self).__init__()
self.num_labels = 3
self.base_model = T5ForConditionalGeneration.from_pretrained('t5-small')
# average of n last hidden layers
self.layers = 3
# change beam search or greedy search here
# Suggested parameters from the T5 paper: num_beams = 4 and length penalty alpha = 0.6
self.base_model.config.num_beams = 1 # <-- change to 1 for greedy decoding
self.base_model.config.length_penalty = 0.6 # <-- comment this out for greedy decoding
self.dropout = nn.Dropout(0.5)
self.dense = nn.Linear(in_features=512 * self.layers, out_features=self.num_labels)
def forward(self, input_ids, attn_mask, labels):
hidden_states = self.base_model(
input_ids,
attention_mask=attn_mask,
output_hidden_states=True
)
hidden_states = torch.cat([hidden_states['decoder_hidden_states'][-(n+1)] for n in range(self.layers)], dim=2)
logits = self.dense(self.dropout(hidden_states))
loss = None
loss_fct = nn.CrossEntropyLoss(weight=class_weights)
# Only keep active parts of the loss
if attn_mask is not None:
active_loss = attn_mask.view(-1) == 1
active_logits = logits.view(-1, self.num_labels)
active_labels = torch.where(
active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
)
loss = loss_fct(active_logits, active_labels)
else:
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return {'logits':logits,
'loss':loss}
However, I am confused about how should do inference in this approach. Should I use the .generate function as when T5 has a standard LM head? If that is the case, then I don't know how to inherit the function into my new model class...
Or can I use a normal evaluation loop?
E.g. something like this?:
predictions = []
all_labels = []
with torch.no_grad():
for batch in tqdm(test_loader):
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
outputs = model(input_ids=input_ids,
attn_mask=attention_mask
)
for sample, lab in zip(outputs['logits'],labels):
preds = torch.argmax(sample, dim=1)
predictions.append(preds)
all_labels.append(lab)
I would still like to experiment with beam search...

Categories