Triplet-Loss not converging using Tensorflow - python

Currently I am trying to implement triplet loss using Tensorflow attached my code.
But I found that the training always diverges. Can anyone help me find out where the problem is?
def compute_triplet_loss(anchor_features, positive_features, negative_features, margin=0.1):
with tf.name_scope("triplet_loss"):
anchor_features_norm = compute_norm(anchor_features)
positive_features_norm = compute_norm(positive_features)
negative_features_norm = compute_norm(negative_features)
denom1 = tf.multiply(anchor_features_norm, positive_features_norm)
denom2 = tf.multiply(anchor_features_norm, negative_features_norm)
a_p_product = compute_dot_product(anchor_features, positive_features)
a_n_product = compute_dot_product(anchor_features, negative_features)
a_p_vec = tf.divide(a_p_product, denom1)
a_n_vec = tf.divide(a_n_product, denom2)
loss = tf.maximum(0., tf.add(tf.subtract(a_n_vec, a_p_vec), margin))
return loss
def DNN(input_tensor, is_training, Reuse=True):
tf.add_to_collection('input_tensor', input_tensor)
with tf.name_scope('uttr_net', [input_tensor]):
with tf.variable_scope('uttr_vars', reuse=Reuse):
x = slim.fully_connected(input_tensor, 1024, scope='fc_1')
x = tf.layers.batch_normalization(x, training=is_training, momentum=0.995, name='bn_1')
x = slim.fully_connected(x, 1024, scope='fc_2')
x = tf.layers.batch_normalization(x, training=is_training, momentum=0.995, name='bn_2')
x = slim.fully_connected(x, 1024, scope='fc_3')
x = tf.layers.batch_normalization(x, training=is_training, momentum=0.995, name='bn_3')
x = slim.fully_connected(x, 80, scope='d_vec')
x = tf.layers.batch_normalization(x, training=is_training, momentum=0.995, name='bn_4')
# pooling
x = tf.expand_dims(x, 1)
x = tf.nn.avg_pool(x, ksize=[1,1,2,1], strides=[1,1,1,1], padding='VALID')
x = tf.squeeze(x, [1])
x = slim.dropout(x, keep_prob=0.65, is_training=is_training, scope='final_drop')
x = tf.reduce_mean(x, axis=1, name='uttr_mean')
tf.add_to_collection('d-vec', x)
return x
def build_graph(input_shape, learning_rate=0.001):
input_positive_0 = tf.placeholder(tf.float32, shape=input_shape, name='input_positive_0')
input_positive_1 = tf.placeholder(tf.float32, shape=input_shape, name='input_positive_1')
input_negative = tf.placeholder(tf.float32, shape=input_shape, name='input_negative')
is_training = tf.placeholder(tf.bool, [])
tf.add_to_collection('input_positive_0', input_positive_0)
tf.add_to_collection('input_positive_1', input_positive_1)
tf.add_to_collection('input_negative', input_negative)
tf.add_to_collection('is_training', is_training)
DNN_0 = DNN(input_positive_0, is_training, False)
DNN_1 = DNN(input_positive_1, is_training)
DNN_2 = DNN(input_negative, is_training)
loss = compute_triplet_loss(DNN_0, DNN_1, DNN_2)
loss = tf.reduce_sum(loss)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return optimizer, loss, input_positive_0, input_positive_1, input_negative, is_training


How to plot Receptive Fields, for a CNN/fashionMNIST?

I created my CNN with PyTorch Lightning, and I am actually looking for plotting the Receptive Fields.
Do you have any suggestions about it?
I look for different solutions here and there, but I actually can't make them synergize with PyTorch Lightning.
Is it possible to visualize the Receptive fields directly inside Tensorboard?
I'll share with you my Dataset:
train_dataset = torchvision.datasets.FashionMNIST('classifier_data', train=True, download=True, transform=transforms.ToTensor())
train, val = train_test_split(train_dataset, test_size = .2)
train_loader = DataLoader(train, batch_size = 32)
val_loader = DataLoader(train, batch_size = 32)
test_dataset = torchvision.datasets.FashionMNIST('classifier_data', train=False, download=True, transform=transforms.ToTensor())
test_loader = DataLoader(test_dataset, batch_size = 32)
and CNN:
def __init__(self, dropout, learn_rate, momentum, weight_decay, optimizer):
#def __init__(self, dropout, learn_rate, weight_decay, optimizer):
self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5)
self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12 , kernel_size = 5)
self.fc1 = nn.Linear(in_features = 12*4*4, out_features = 120)
self.fc2 = nn.Linear(in_features = 120, out_features = 60)
self.out = nn.Linear(in_features = 60, out_features = 10) = nn.Dropout(dropout) #for overfitting issues
self.loss = nn.CrossEntropyLoss()
self.accuracy = torchmetrics.Accuracy()
self.learn_rate = learn_rate
self.momentum = momentum #with Adam we don't have momentum. To Check best Optimizer with Optune, please comment this line.
self.weight_decay = weight_decay
self.optimizer = optimizer
self.train_loss = []
self.val_loss = []
self.train_acc = []
self.test_acc = []
#plot into tensorboard
log_dir = pathlib.Path.cwd() / "lightning_logs"
self.writer = SummaryWriter(log_dir)
#forward step
#I add each layer to the histogram. It's plotted into tensorboard
def forward(self, x, additional_out=False):
x = self.conv1(x)
self.writer.add_histogram("First convolutional layer CNN", x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size = 2, stride = 2)
x = self.conv2(x)
self.writer.add_histogram("Second convolutional layer CNN", x)
x = F.relu(x)
x = F.max_pool2d(x, kernel_size = 2, stride = 2)
#fuly connected 1
x = x.reshape(-1, 12*4*4)
x = self.fc1(x)
self.writer.add_histogram("First linear layer CNN", x)
x = F.relu(x)
x =
#fully connected 2
self.writer.add_histogram("Second linear layer CNN", x)
x = F.relu(x)
x =
x = self.out(x)
self.writer.add_histogram("Output layer CNN", x)
return x
def configure_optimizers(self):
#optimizer = self.optimizer(self.parameters(), lr = self.learn_rate, momentum = self.momentum, weight_decay = self.weight_decay)
optimizer = self.optimizer(self.parameters(), lr = self.learn_rate, weight_decay = self.weight_decay)
return optimizer
#training step
def training_step(self, batch, batch_idx):
x, y = batch
b = x.size(0)
x = x.view(b, -1, 28, 28)
logit = self(x)
J = self.loss(logit, y) #loss
#self.train_loss.append(J) #no need to append
acc = self.accuracy(logit, y) #accuracy
#self.train_acc.append(acc) #no need to append
self.log("train_loss_cnn", J.item())
self.log("train_acc_cnn", acc.item())
return {'loss': J}
#Since I used Tensorboard, it don't have to append to loss
def test_step(self, batch, batch_idx):
p, q = batch
b = p.size(0)
p = p.view(b, -1, 28, 28)
logit = self(p)
J = self.loss(logit, q) #loss
acc_test = self.accuracy(logit, q) #accuracy
#self.train_acc.append(acc_test) #no need to append
#self.train_loss.append(J) #no need to append
self.log("test_acc_cnn", acc_test.item())
self.log("test_loss_cnn", J.item())
def validation_step(self, batch, batch_idx=None):
u, v = batch
b = u.size(0)
u = u.view(b, -1, 28, 28)
logit = self(u)
J = self.loss(logit, v) #loss
#self.val_loss.append(J) #no need to append
acc_val = self.accuracy(logit, v) #accuracy
#self.train_acc.append(acc_val) #no need to append
self.log("val_loss_cnn", J.item())
self.log("val_acc_cnn", acc_val.item())
return {"loss": J, "pred": logit, "target": v}
#Once saves from validation step, I take with me the returned elements, and I can plot the Confusion Matrix inside Tensorboard
def validation_epoch_end(self, outputs):
preds =[tmp['pred'] for tmp in outputs])
targets =[tmp['target'] for tmp in outputs])
conf_mat = confusion_matrix(preds, targets, num_classes=10)
df_cm = pd.DataFrame(conf_mat.numpy(), index = range(10), columns=range(10))
plt.figure(figsize = (10,7))
fig_ = sns.heatmap(df_cm, annot=True, cmap='Spectral').get_figure()
self.logger.experiment.add_figure("Confusion matrix CNN", fig_, self.current_epoch)

Pytorch network not training

I am trying to train an activity recognition system using PyTorch, but the network is not training and loss is not dropping, even though I have a similar model working perfectly on keras. I have provided code for the training loop, model class, and dataset class here. Can you help me why the loss is not dropping (accuracy is not increasing)
main training loop
# create dataset
dataset = IMU_dataset()
train_loader = DataLoader(dataset=dataset,
num_epochs = 100
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)
input_shape = 3
output_index = 6
device = torch.device('cpu')
model = HARmodel(input_shape, output_index).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
for epoch in range(num_epochs):
for i, (inputs, labels) in enumerate(train_loader):
# origin shape: [40, 3, 400]
labels =
# Forward pass
outputs = model(
loss = criterion(outputs, labels.long())
# Backward and optimize
# if (i+1) % 5 == 0:
# print(f'loss: {loss.item()}')
print(model.calculate_accuracy(dataset.x_data, dataset.y_data), model.calculate_loss(dataset.x_data, dataset.y_data, criterion))
Here is the model class.
class HARmodel(nn.Module):
"""Model for human-activity-recognition."""
def __init__(self, input_size, num_classes):
# Extract features, 1D conv layers
self.layer_1 = nn.Conv1d(input_size, 100, 10, stride=1)
self.activation_relu = nn.ReLU()
self.layer_2 = nn.Conv1d(100, 100, 10, stride=1)
self.layer_3 = nn.Conv1d(100, 100, 10, stride=1)
self.layer_4 = nn.MaxPool1d(2, stride=3)
self.layer_5 = nn.Dropout(p=0.2)
self.layer_6 = nn.Conv1d(100, 160, 10, stride=1)
self.layer_7 = nn.Conv1d(160, 160, 10, stride=1)
self.layer_8 = nn.Conv1d(160, 160, 10, stride=1)
# self.layer_9 = nn.AvgPool1d(97)
self.layer_10 = nn.Dropout(p=0.5)
self.layer_11 = nn.Linear(160, 6)
self.activation_softmax = nn.Softmax()
def forward(self, x):
x = self.layer_1(x)
x = self.activation_relu(x)
x = self.layer_2(x)
x = self.activation_relu(x)
x = self.layer_3(x)
x = self.activation_relu(x)
x = self.layer_4(x)
x = self.layer_5(x)
x = self.layer_6(x)
x = self.activation_relu(x)
x = self.layer_7(x)
x = self.activation_relu(x)
x = self.layer_8(x)
x = self.activation_relu(x)
self.layer_9 = nn.AvgPool1d(x.shape[2])
x = self.layer_9(x)
x = self.layer_10(x)
y = self.layer_11(x.view(x.shape[0],x.shape[1]))
# y = self.activation_softmax(y)
return y
def calculate_accuracy(self, X,y):
with torch.no_grad():
output = model.forward(X.float())
max_index = output.max(dim = 1)[1]
true_output = y.type(torch.LongTensor)
result = (max_index == true_output).sum()/y.shape[0]
return result.detach().numpy()
def calculate_loss(self, X,y, crit):
with torch.no_grad():
output = model.forward(X.float())
max_index = output.max(dim = 1)[1]
true_output = y.type(torch.LongTensor)
return crit(output, true_output).item()
Here is the dataset class:
class IMU_dataset(Dataset):
def __init__(self):
self.n = X.shape[0]
self.x_data = torch.from_numpy(X.reshape(-1,3,400))
self.y_data = torch.from_numpy(y)
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n
I got to know that I need to remove the softmax layer
I have tried with a lower learning rate and still have the same problem.

Tensorflow - Changing dropout value has no effect on network

I trained a network to perform semantic segmentation with dropout, and it is my understanding that as you vary the dropout keep_prob value, the output prediction changes. However, after saving the model using the tensorflow-serving method, loading it using tf.saved_model.loader.load, and varying the dropout value, I get the same output prediction value (dice score).
I followed the suggestions in this SO post, but I still get the same prediction results even if I enter 0.0.
Didn't know if it was a tensorflow issue or a bug in my code, so I tried downgrading from v1.15 to v1.10 to see if it was the former and still got the same results. I am sure it is a bug in my code now, but I am not sure where it is. A minimum working example is shown below. Could someone help me? Thank you!
This is a snippet of my training script:
def run_iteration(self, feed_dict, op_list, summaries):
output_args =, feed_dict=feed_dict)
return output_args
def run_epoch_train(self, curr_epoch):
print('Training over all batches')
num_total_batches = self.num_total_batches_train
curr_batch_counter = 0
# for each batch in training images
for batch in self.batch_iterator_train:
# dropout is included
if self.dropout_training_Flag == 1:
_, loss, dice = self.run_iteration(
self.placeholders['images']: batch['images'],
self.placeholders['labels']: batch['labels'],
self.placeholders['is_training']: True,
self.placeholders['dropout_prob']: self.dropout_prob_training,
curr_batch_counter = curr_batch_counter + 1
if (self.iteration % 5) == 0:
print('Saving model in training session') + 1)
This is a snippet of my testing script:
path_to_model = self.root_path_to_models + '/' + '25'
model = tf.saved_model.loader.load( #tf.saved_model.loader.load(
inputImage_name = model.signature_def['prediction'].inputs['images'].name
x_inp = tf.get_default_graph().get_tensor_by_name(inputImage_name)
isTraining_name = model.signature_def['prediction'].inputs['is_training'].name
tflag_op = tf.get_default_graph().get_tensor_by_name(isTraining_name)
outputs_name = model.signature_def['prediction'].outputs['sigmoid'].name
y_op = tf.get_default_graph().get_tensor_by_name(outputs_name)
if self.dropout_training_Flag == 1:
dropoutProb_name = model.signature_def['prediction'].inputs['dropout_prob'].name
dropout_prob_op = tf.get_default_graph().get_tensor_by_name(dropoutProb_name)
# iterate over batches of images
# iterate over motion category
for moCat in self.motion_categories:
# get datasets in motion category
datasets_in_moCat = d_ffn_images_labels[moCat]
dataset_name = list(datasets_in_moCat.keys())[-1]
loss_for_each_image = []
final_vol = np.zeros((self.original_input_image_width, self.original_input_image_height, self.num_vol_slices), dtype = np.uint8)
# get images
curr_dataset_images = datasets_in_moCat[dataset_name][0][0]
# get labels
curr_dataset_labels = datasets_in_moCat[dataset_name][0][1]
#current dataset label numbers
curr_dataset_label_numbers = d_bfnumber_images_labels[moCat][dataset_name]
# number of images/labels in current dataset, for current category
num_images = len(curr_dataset_images)
num_labels = len(curr_dataset_labels)
# check if num-images/labels are the same
assert(num_images == num_labels)
# load each image
for elem_idx in range(num_images):
img_path = curr_dataset_images[elem_idx]
lab_path = curr_dataset_labels[elem_idx]
xn = nib.load(img_path)
x = np.array(xn.dataobj)
labn = nib.load(lab_path)
lab = np.array(labn.dataobj)
data_affine_tform = xn.affine
# resize
xr = cv2.resize(x, (self.network_input_image_width, self.network_input_image_height), interpolation = cv2.INTER_LANCZOS4)
# standardize
y = standardize_zeroMeanUnitVar_image(copy.deepcopy(xr), self.network_input_image_width, self.network_input_image_height, self.network_input_channels)
#y = cv2.normalize(copy.deepcopy(xr), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
# match network input -- [height, width, channels]
y = np.reshape(y, newshape=(self.network_input_image_height, self.network_input_image_width, self.network_input_channels))
# append to match network input -- [batch, height, width, channels]
input_list = []
input_list = np.asarray(input_list).astype(np.float32)
# ======================
# CHANGED VALUES FROM 0.0, 0.5, 1.0 -- same prediction score
# ======================
# run and get output
if self.dropout_training_Flag == 1:
output =, feed_dict={x_inp: input_list, tflag_op: True, dropout_prob_op: self.dropout_prob_testing})
output =, feed_dict={x_inp: input_list, tflag_op: False})
tmpOut = cv2.resize(output[0,:,:,0], (self.original_input_image_width, self.original_input_image_height), interpolation = cv2.INTER_LANCZOS4)
prediction = np.asarray((tmpOut > 0.5))
labels = np.asarray((lab > 0))
EPS = 0.0000001
#output_original = cv2.resize(output[0,:,:,0], (original_input_image_width, original_input_image_height), interpolation = cv2.INTER_LANCZOS4)
loss = 2.0 * np.sum(labels * prediction, axis=(0, 1)) / (np.sum(labels ** 2 + prediction ** 2, axis=(0, 1)) + EPS)
#place slice in final_vol
final_vol[:,:,curr_dataset_label_numbers[elem_idx][1] - 1] = np.asarray(prediction*255.0).astype(np.uint8)
# dice mean over dataset
dice_mean_for_dataset = np.mean(loss_for_each_image)
print(dataset_name, dice_mean_for_dataset)
This is the code for the inputs/outputs:
def create_placeholders(self):
self.placeholders['images'] = tf.placeholder(
shape=[None] + self.network_input_size + [self.network_input_channels],
self.placeholders['labels'] = tf.placeholder(
shape=[None] + self.network_input_size + [self.network_output_channels],
self.placeholders['is_training'] = tf.placeholder(
# dropout is included
if self.dropout_training_Flag == 1:
self.placeholders['dropout_prob'] = tf.placeholder(
def create_outputs(self):
if self.network_name == 'UNet':
print('Training UNet')
# dropout is included
if self.dropout_training_Flag == 1:
# train with dropout
unet_output = unet_dropout(
if self.network_output_channels == 1:
self.outputs['sigmoid'] = unet_output
self.outputs['sigmoid'] = unet_output
This is the code for my model:
def batch_norm_relu(inputs, is_training):
net = slim.batch_norm(inputs, is_training=is_training)
net = tf.nn.relu(net)
return net
def dropout (input, keep_prob, is_training):
if is_training == True:
dropout = tf.nn.dropout(input, keep_prob)
dropout = input
return dropout
def model(inputs, is_training, keep_prob, num_classes):
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
base_num_kernels = 64
# =================================
# encoder
# 256
x = conv2d_fixed_padding(inputs=inputs, filters=base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
x = conv2d_fixed_padding(inputs=x, filters=base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
output_b1 = x
output_list_b1 = [x]
output_b1 = dropout(output_b1, keep_prob, is_training)
output_b1 = tf.layers.max_pooling2d(inputs=output_b1, pool_size=2, strides=2, padding='SAME')
# =================================
# 128
x = conv2d_fixed_padding(inputs=output_b1, filters=2*base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
x = conv2d_fixed_padding(inputs=x, filters=2*base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
output_b2 = x
output_list_b2 = [x]
output_b2 = dropout(output_b2, keep_prob, is_training)
# =================================
# decoder
# 128 -> 256
output_b3 = conv2d_transpose(output_b2, kernel_size=2, output_channels=base_num_kernels)
output_b4 = tf.concat([output_b3, x], axis=3)
# =================================
# 256
conv_final = conv2d_fixed_padding(inputs=output_b4, filters=base_num_kernels, kernel_size=3, stride=1)
conv_final = batch_norm_relu(conv_final, is_training)
conv_final = conv2d_fixed_padding(inputs=conv_final, filters=base_num_kernels, kernel_size=3, stride=1)
conv_final = batch_norm_relu(conv_final, is_training)
# =================================
# output
outputs = conv2d_fixed_padding(inputs=conv_final, filters=num_classes, kernel_size=3, stride=1)
if num_classes == 1:
outputs = tf.nn.sigmoid(outputs)
h = outputs.get_shape().as_list()[1]
w = outputs.get_shape().as_list()[2]
outputs_reshaped = tf.reshape(outputs, np.asarray([-1, num_classes]))
outputs_final = tf.nn.softmax(outputs_reshaped)
outputs = tf.reshape(outputs_final, np.asarray([-1, h, w, num_classes]))
return outputs
This is the way that I save the network weights:
def __create_summary_manager(self):
self.saver = Saver(
import tensorflow as tf
class Saver(object):
def __init__(self, sess, input_dict, output_dict, path):
self.sess = sess
self.input_dict = input_dict
self.output_dict = output_dict
self.path = path
self.iteration = 0
self.input_dict_info = {}
self.output_dict_info = {}
for key in input_dict.keys():
self.input_dict_info[key] = \
for key in output_dict.keys():
self.output_dict_info[key] = \
self.prediction_signature = (
def save(self, iteration_val):
self.iteration += 1
export_path = os.path.join(
self.builder = tf.saved_model.builder.SavedModelBuilder(export_path)
self.sess, [tf.saved_model.tag_constants.SERVING],
'prediction': self.prediction_signature,

How to initialize a neural network by user defined parameters in tensorflow

I am trying to implement a Neural Network using tensorflow. I want my model to initialize itself using default parameters or any weight size list passed by users. But the compute graph generated by my code seems incorrect. How can I initialize a NN by user-defined parameters in tensorflow?
def setUp(self):
self.hidden_layer_ = len(self.hidden_layer_sizes)
self.weights = []
self.biases = []
size_list = [self.input_size]
i = 0
while i + 1 < len(size_list):
prev_size = size_list[i]
size = size_list[i+1]
w = tf.Variable(tf.truncated_normal([prev_size, size], stddev=0.1))
b = tf.Variable(tf.zeros([size]))
i += 1
self.w_out = tf.Variable(tf.truncated_normal([size_list[-1], self.output_size], stddev=0.1))
self.b_out = tf.Variable(tf.zeros([self.output_size]))
self.input_x = tf.placeholder(tf.float32, [None, self.input_size], name='input')
self.input_y = tf.placeholder(tf.float32, [None, self.output_size], name='label')
self.keep_prob_plh = tf.placeholder(tf.float32)
self.layers = [self.input_x]
for w, b in zip(self.weights, self.biases):
hidden = ACT_FUN_DICT[self.activation](tf.matmul(self.layers[-1], w) + b)
hidden_drop = tf.nn.dropout(hidden, self.keep_prob_plh)
with tf.variable_scope('output'):
self.output_layer = tf.nn.sigmoid(tf.matmul(self.layers[-1], self.w_out) + self.b_out)
self.cost_func = tf.reduce_mean(tf.reduce_sum(tf.pow((self.input_y - self.output_layer), 2)))
self.cost_summary = tf.summary.scalar('Cost', self.cost_func)
self.optimizer = SOLVER_DICT[self.solver](self.learning_rate).minimize(self.cost_func)
root_logdir = './tf_logs'
now = datetime.utcnow().strftime('%Y%m%d%H%M%S')
log_dir = "{}/run-{}/".format(root_logdir, now)
self.file_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())
I implement the NN using a class, the size of weights are stored in the member self.hidden_layer_sizes.
class MLPClassifier(BaseEstimator, TransformerMixin):
def __init__(self, hidden_layer_sizes=(100,), activation='relu', solver='sgd', alpha=0.0001,
learning_rate=0.001, max_iter=200, random_state=42, keep_prob=0.75, logged = True):
self.hidden_layer_sizes = hidden_layer_sizes
self.activation = activation
self.solver = solver
self.alpha = alpha
self.learning_rate = learning_rate
self.max_iter = max_iter
self.random_state = random_state
self.keep_prob = keep_prob
self.fitted = False
self.logged = True
self.sess = tf.Session()
That's all my code, and this is my compute graph:

Seq2Seq Loss Function Help Tensorflow

I'm having trouble trying to figure out how to create a loss function for my basic_seq2seq model.
My input is a paragraph and the output is a section title for the paragraph.
Here is my current code:
import tensorflow as tf
import numpy as np
import pickle
import sys
MAX_NUM_WORDS = 500000
CONV_DIM = 128
EPOCHS = 100
num_paragraphs = 5200000
weights_lstm = {'out': tf.Variable(tf.random_normal([BATCH_SIZE, 200, SECTION_VOCAB_SIZE]))}
biases_lstm = {'out': tf.Variable(tf.random_normal([BATCH_SIZE, SECTION_VOCAB_SIZE]))}
embedding_matrix = np.zeros((MAX_NUM_WORDS+1, 200))
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv1d(x, W):
return tf.nn.conv1d(x, W, stride=1, padding='SAME')
def max_pool_1d(x):
return tf.layers.max_pooling1d(x, pool_size=2, strides=2, padding='same')
def batch_norm(x):
return tf.layers.batch_normalization(x)
def model(x, y):
input = x
with tf.device('/cpu:0'):
input = tf.nn.embedding_lookup(W_e, x)
output_y = tf.nn.embedding_lookup(W_e, y)
bn1 = batch_norm(input)
an1 = tf.nn.relu(bn1)
drop1 = tf.layers.dropout(an1, 0.2)
W_conv1 = weight_variable([3, 200, CONV_DIM])
b_conv1 = bias_variable([CONV_DIM])
h_conv1 = tf.nn.relu(conv1d(drop1, W_conv1) + b_conv1)
bn2 = batch_norm(h_conv1)
an2 = tf.nn.relu(bn2)
W_conv2 = weight_variable([3, CONV_DIM, CONV_DIM/2])
b_conv2 = bias_variable([CONV_DIM/2])
h_conv2 = tf.nn.relu(conv1d(an2, W_conv2) + b_conv2)
bn3 = batch_norm(h_conv2)
an3 = tf.nn.relu(bn3)
W_conv3 = weight_variable([3, CONV_DIM/2, CONV_DIM/4])
b_conv3 = bias_variable([CONV_DIM/4])
h_conv3 = tf.nn.relu(conv1d(an3, W_conv3) + b_conv3)
mp1 = max_pool_1d(h_conv3)
enc = tf.unstack(mp1, axis=1)
dec = tf.unstack(output_y, axis=1)
lstm_cell = tf.contrib.rnn.LSTMCell(200, forget_bias=1.0, activation=tf.nn.softmax)
outputs, states = tf.contrib.legacy_seq2seq.basic_rnn_seq2seq(enc, dec, lstm_cell)
projected_outputs = []
with tf.device('/cpu:0'):
for output in outputs:
projected_output = (weights_lstm['out'] * output) + biases_lstm['out']
stacked_outputs = tf.stack(projected_outputs, 1) # [? x 45 x V]
weights = tf.ones_like(y, dtype=tf.float32)
loss = tf.contrib.seq2seq.sequence_loss(logits = stacked_outputs, targets = y, weights = weights, name = 'loss')
# gold_outputs = tf.unstack(output_y, axis=1)
#cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=output_y, logits=outputs))
# output_y = [? x 45 x 200]
# outputs = 45 tensors of [? x 200]
# stacked_outputs = tf.stack(outputs, 1) # [? x 45 x 200]
# correct_prediction = tf.equal(tf.argmax(stacked_outputs, 1), tf.argmax(output_y, 1))
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return outputs, loss #cross_entropy
#print('Loading Embeddings...')
#with open('embeddings.txt', 'rb') as f:
# embedding_matrix = pickle.load(f)
print('Creating Placeholders...')
X = tf.placeholder(tf.int32, [None, MAX_PAR_LENGTH])
Y = tf.placeholder(tf.int32, [None, SECTION_LENGTH])
with tf.device('/cpu:0'):
W_e = tf.Variable(embedding_matrix, dtype=tf.float32, trainable=False)
print('Creating Model...')
preds, loss = model(X, Y)
print('Creating Training Parameters...')
train_step = tf.train.RMSPropOptimizer(1e-4).minimize(loss)
saver = tf.train.Saver()
print('Starting Session...')
with tf.Session() as sess:
for i in range(EPOCHS):
print('Epoch ' + str(i))
print('Number of batches ', str(num_paragraphs/BATCH_SIZE))
with open('section_train_data_final.txt', 'rb') as f:
for j in range(num_paragraphs/BATCH_SIZE):
#load data
paragraphs = []
for k in range(BATCH_SIZE):
x = np.array([ p for p,s in paragraphs ])
#y = np.array([, depth=SECTION_VOCAB_SIZE, on_value=1.0, off_value=0.0)) for p,s in paragraphs ])
y = np.array([ s for p,s in paragraphs ])
_, step_loss =[train_step, loss], feed_dict={X:x, Y: y})
if j % 100 == 0 and j != 0:
# train_acc =, feed_dict={X: x, Y: y})
print('Epoch %d: Batch %d: Loss: %g' % (i, j, step_loss)), '~\data\generation_model')
Any help on how to create this loss function would be helpful.
I'm very new to tensorflow so I tried the simple loss function that's commented out
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=output_y, logits=outputs))
But it didn't work as the loss always came out to 0. My friend tried to create a loss function that's currently what is there but I have no clue what he was trying to do.
