Tensorflow Data Pipeline Generator (tf.py_function) - python

I'm training an MRI super-resolution model on about 40k volumes with Tensorflow/Keras and a custom training loop:
Pseudo python/tf code :
for e in epochs:
for s in the steps:
batch = data_handler.get_batch(files[s])
model.train_on_batch(batch['lr'], batch['hr'])
def get_batch(volume_path) # online data augmentation and image degradation
vol_hr = np.load(volume_path) # 200x200x200 array
vol_hr = augment(vol_hr) # flips / rotation
vol_lr = degrade(vol_hr) # blur, add noise, downsample (100x100x200)
batch = crop_patches(vol_lr, vol_hr)
# batch['lr'].shape == batch_size, 32, 32, 3
# batch['hr'].shape == batch_size, 64, 64, 3
return batch
This runs terribly slow so I'm looking into optimizing the training through (i) implementing tf data pipeline (tf.data.Dataset) and (ii) Distributed training. This post is about optimizing the data pipeline.
My plan is to implement a tf data input pipeline (tf 2.4.1). The generator looks something like this at the moment:
import tensorflow as tf
import numpy as np
import os
import time
from skimage.transform import downscale_local_mean
def make_rand_vols(n, size=(200, 200, 200), tmp_path='./tmp/tmp_vol'):
if not os.path.isdir(tmp_path):
os.makedirs(tmp_path)
rand_arr_fnames = []
for f in range(n):
rand_arr = np.random.rand(size[0], size[1], size[2])
fpath = os.path.join(tmp_path, f"{f}.npy")
np.save(fpath, rand_arr)
rand_arr_fnames.append(fpath)
return rand_arr_fnames
def benchmark(dataset, num_epochs=2, steps=10):
start_time = time.perf_counter()
for epoch_num in range(num_epochs):
for s, sample in enumerate(dataset):
# Performing a training step
print(s)
time.sleep(0.01)
if s == steps - 1:
break
print("Execution time:", time.perf_counter() - start_time)
class CustomGenerator(tf.keras.utils.Sequence):
def __init__(self, fnames, batch_size=3,
scale=2, shuffle=True, patch_size=32, n_channels=3):
self.fnames = fnames
self.shuffle = shuffle
self.batch_size = batch_size
self.on_epoch_end()
self.scale = scale
self.lr_patch_size = patch_size
self.hr_patch_size = patch_size * scale
self.n_channels = n_channels
def __len__(self):
return len(self.fnames)
def on_epoch_end(self):
#'Updates indexes after each epoch'
if self.shuffle == True:
np.random.shuffle(self.fnames)
def __getitem__(self, idx):
vol_hr = np.load(self.fnames[idx]) # array of size e.g. (300, 300, 300)
vol_hr = self.augment(vol_hr)
vol_lr = self.degrade(vol_hr, self.scale) # array of size e.g. (150, 150, 300)
batch = self.get_vol_crops(vol_lr, vol_hr) # array of size (batch_size, width / scale, length / scale, depth)
# batch -> (batch_lr, batch_hr) -> (arr [batch_size, patch_lr, patch_lr, n_channels], arr [batch_size, patch_hr, patch_hr, n_channels])
return batch
def augment(self, vol):
if np.random.randint(0, 2):
vol = vol[:, ::-1, :] # simple for illustration
return vol
def degrade(self, vol_hr, downscale_factor):
vol_lr = downscale_local_mean(vol_hr,
(downscale_factor, downscale_factor, 1),
cval=0)
return vol_lr
def get_vol_crops(self, vol_lr, vol_hr):
pad = 2
rows_lr, cols_lr, n_slices = vol_lr.shape
lr_y_list = np.random.randint(0, rows_lr - self.lr_patch_size - 1, batch_size) #top left row lr
lr_x_list = np.random.randint(0, cols_lr - self.lr_patch_size - 1, batch_size) # top left col lr
z_list = np.random.randint(pad, n_slices - pad, self.batch_size)
lr_patches, hr_patches = [], []
for y_lr, x_lr, z in zip(lr_y_list, lr_x_list, z_list):
slice_idxs = [z for z in range(z - n_channels // 2, z + n_channels//2 + 1)]
# LR patch
patch_lr = vol_lr[y_lr: y_lr + self.lr_patch_size,
x_lr: x_lr + self.lr_patch_size,
slice_idxs]
print()
lr_patches.append(patch_lr)
# HR patch
y_hr, x_hr = scale * y_lr, scale * x_lr
patch_hr = vol_hr[y_hr: y_hr + self.hr_patch_size,
x_hr: x_hr + self.hr_patch_size,
slice_idxs]
#print(patch_lr.shape, patch_hr.shape)
#preview_patches(patch_lr, patch_hr)
hr_patches.append(patch_hr)
return np.array(lr_patches), np.array(hr_patches)
test_vol_fnames = make_rand_vols(50)
# Make tf.data.Dataset from keras Generator
batch_size = 24
patch_size = 32
scale = 2
n_channels = 3
gen = CustomGenerator(test_vol_fnames, patch_size=patch_size, batch_size=batch_size)
benchmark(gen, steps=50)
# Execution time: 9.38716044393368
dataset = tf.data.Dataset.from_generator(
lambda: gen,
output_types=(tf.float32, tf.float32),
output_shapes = ([batch_size, patch_size, patch_size, n_channels],
[batch_size, patch_size*scale, patch_size*scale, n_channels])
)
benchmark(dataset, steps=50)
# Execution time: 9.54540992085822
benchmark(dataset.prefetch(tf.data.AUTOTUNE), steps=50)
# Execution time: 8.615464322036132
benchmark(
tf.data.Dataset.range(2)
.interleave(
lambda _: dataset,
num_parallel_calls=tf.data.AUTOTUNE),
steps=50
)
# Execution time: 5.204080824041739
def mapped_generator(g):
return tf.py_function(lambda f: g,
inp=[f],
Tout=[tf.float32, tf.float32])
dataset_parallel = tf.data.Dataset.from_generator(
lambda: mapped_generator(gen),
output_types=(tf.float32, tf.float32),
output_shapes=([batch_size, patch_size, patch_size, n_channels],
[batch_size, patch_size*scale, patch_size*scale, n_channels])
)
benchmark(
dataset_parallel,
steps=50
)
# NameError: name 'f' is not defined
Is there anything I can do to make this more efficient? How can I wrap the generator inside tf.py_function?

Related

I should have a shape [150,2,2] but get a shape of [2,2,2] with DataLoader

I'm currently trying to train a Recurrent Neural Network with PyTorch and I am having trouble managing the DataLoader. Let's start from the beginning.
import matplotlib.pyplot as plt
import numpy as np
import torch
T = 50 #period
t = 300 #time
timeStep = np.linspace(0,t,300)
mu = 0
sigma = np.sqrt(0.001)
x1 = []
x2 = []
for s in timeStep:
eps1 = np.random.randn(1)*sigma+mu
eps2 = np.random.randn(1)*sigma+mu
x1.append(np.cos(2*s*np.pi/T)+eps1)
x2.append(np.sin(4*s*np.pi/T)+eps2)
from torch import nn
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import Dataset, DataLoader
class Data(torch.utils.data.Dataset):
def __init__(self):
for sample in range(10):
self.X = torch.from_numpy(np.stack([x1, x2], axis=1).reshape([-1, 2, 2])).float()
self.Y = torch.from_numpy(np.append(np.delete(self.X, 0, axis=0), self.X[1].reshape([1, 2, 2]), axis=0)).float()
print(self.X.shape,self.Y.shape)
def __len__(self):
return len(self.X)
def __getitem__(self, index):
feature = self.X[index]
label = self.Y[index]
return feature, label
dataset = Data()
At this point, dataset.X.shape,dataset.Y.shape gives [150,2,2], [150,2,2]. So up until here, that's what I need to get so no problem. (I get 2 samples of 150 time series data from a 300 data time series).
from torch.autograd import Variable
from typing import Tuple
class Recurrent(nn.Module):
def __init__(self, hidden_dim: int = 20):
super().__init__()
self.hidden_dim: int = hidden_dim
self.hidden: Tuple[Variable, Variable] = self.init_hidden()
self.rnn = nn.LSTM(2, self.hidden_dim)
self.fc = nn.Sequential(
nn.ReLU(),
nn.Linear(self.hidden_dim, 2)
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x, hidden = self.rnn(x, self.hidden)
self.hidden = (Variable(hidden[0].data), Variable(hidden[1].data))
x = self.fc(x)
return x
def init_hidden(self) -> Tuple[Variable, Variable]:
return (
Variable(torch.zeros(1, 2, self.hidden_dim)),
Variable(torch.zeros(1, 2, self.hidden_dim))
)
def fit(model, dataset, batch_size=2, epochs = 100, loss_print_per_epoch = 10):
def _w(worker_id):
np.random.seed(np.random.get_state()[1][0] + worker_id)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = LambdaLR(optimizer, lr_lambda=lambda _e: 0.97 ** _e)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
def _train_model(_m, _d):
_m.train()
train_batch_loss = []
for x, y in _d:
optimizer.zero_grad()
output = _m(x)
print(output.shape)
loss = criterion(output, y)
loss.backward(retain_graph=True)
optimizer.step()
train_batch_loss.append(loss.item())
return _m, np.mean(train_batch_loss)
for epoch in range(epochs+1):
model, train_loss = _train_model(model, train_loader)
if epoch % loss_print_per_epoch == 0:
print(f'epoch: {epoch}/{epochs} loss: {train_loss} lr: {scheduler.get_last_lr()[0]}')
scheduler.step()
return model
model = fit(model=Recurrent(), dataset=dataset, batch_size=2, epochs=100)
When I iterate through the DataLoader with my for loop, this is where the problem comes up. output.shape should be [150,2,2] for batch size = 2 and [150,1,2] for batch size = 1. But using the print in the for loop gives me a shape of [2,2,2] and I have no idea why. If anyone could help me understand what is going on here, it would be a great help.

Using Electronic Health Records to predict future diagnosis codes with Gated Recurrent Units - (Error: Sample larger than population or is negative)

I am working on clinical EHR.
I am currently referring to this blog and github link here.
https://sparklerussell.com/post/using-electronic-health-records-to-predict-future-diagnosis-codes-with-gated-recurrent-units/
https://github.com/sparalic/Electronic-Health-Records-GRUs
I have generated the dataset and processed it as per the instructions in the notebooks present in the repository. I am facing an issue trying to train the model.
using : jupytor notebook (with google colab)
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
import numpy as np
import itertools
import pickle
import sys, random
Load Data :
def load_data(sequences, labels):
dataSize = len(labels)
idx = np.random.permutation(dataSize)
nTest = int(np.ceil(0.15 * dataSize))
nValid = int(np.ceil(0.10 * dataSize))
test_idx = idx[:nTest]
valid_idx = idx[nTest:nTest+nValid]
train_idx = idx[nTest+nValid:]
train_x = sequences[train_idx]
train_y = labels[train_idx]
test_x = sequences[test_idx]
test_y = labels[test_idx]
valid_x = sequences[valid_idx]
valid_y = labels[valid_idx]
train_x = [sorted(seq) for seq in train_x]
train_y = [sorted(seq) for seq in train_y]
valid_x = [sorted(seq) for seq in valid_x]
valid_y = [sorted(seq) for seq in valid_y]
test_x = [sorted(seq) for seq in test_x]
test_y = [sorted(seq) for seq in test_y]
train = (train_x, train_y)
test = (test_x, test_y)
valid = (valid_x, valid_y)
return (train, test, valid)
Padding the input :
def padding(seqs, labels, vocab, n_classes):
lengths = np.array([len(seq) for seq in seqs]) - 1 # remove the last list in each patient's sequences for labels
n_samples = len(lengths)
maxlen = np.max(lengths)
x = torch.zeros(maxlen, n_samples, vocab) # maxlen = number of visits, n_samples = samples
y = torch.zeros(maxlen, n_samples, n_classes)
mask = torch.zeros(maxlen, n_samples)
for idx, (seq,label) in enumerate(zip(seqs,labels)):
for xvec, subseq in zip(x[:,idx,:], seq[:-1]):
xvec[subseq] = 1.
for yvec, subseq in zip(y[:,idx,:], label[1:]):
yvec[subseq] = 1.
mask[:lengths[idx], idx] = 1.
return x, y, lengths, mask
GRU Class :
torch.manual_seed(1)
class EHRNN(nn.Module):
def __init__(self, inputDimSize, hiddenDimSize,embSize, batchSize, numClass):
super(EHRNN, self).__init__()
self.hiddenDimSize = hiddenDimSize
self.inputDimSize = inputDimSize
self.embSize = embSize
self.numClass = numClass
self.batchSize = batchSize
#Initialize random weights
self.W_z = nn.Parameter(torch.randn(self.embSize, self.hiddenDimSize).cuda())
self.W_r = nn.Parameter(torch.randn(self.embSize, self.hiddenDimSize).cuda())
self.W_h = nn.Parameter(torch.randn(self.embSize, self.hiddenDimSize).cuda())
self.U_z = nn.Parameter(torch.randn(self.hiddenDimSize, self.hiddenDimSize).cuda())
self.U_r = nn.Parameter(torch.randn(self.hiddenDimSize, self.hiddenDimSize).cuda())
self.U_h = nn.Parameter(torch.randn(self.hiddenDimSize, self.hiddenDimSize).cuda())
self.b_z = nn.Parameter(torch.zeros(self.hiddenDimSize).cuda())
self.b_r = nn.Parameter(torch.zeros(self.hiddenDimSize).cuda())
self.b_h = nn.Parameter(torch.zeros(self.hiddenDimSize).cuda())
self.params = [self.W_z, self.W_r, self.W_h,
self.U_z, self.U_r, self.U_h,
self.b_z, self.b_r, self.b_h]
def forward(self,emb,h):
z = torch.sigmoid(torch.matmul(emb, self.W_z) + torch.matmul(h, self.U_z) + self.b_z)
r = torch.sigmoid(torch.matmul(emb, self.W_r) + torch.matmul(h, self.U_r) + self.b_r)
h_tilde = torch.tanh(torch.matmul(emb, self.W_h) + torch.matmul(r * h, self.U_h) + self.b_h)
h = z * h + ((1. - z) * h_tilde)
return h
def init_hidden(self):
return Variable(torch.zeros(self.batchSize,self.hiddenDimSize))
Custom Layer for handling two layer GRU :
torch.manual_seed(1)
class build_EHRNN(nn.Module):
def __init__(self, inputDimSize=4894, hiddenDimSize=[200,200], batchSize=100, embSize=200,numClass=4894, dropout=0.5,logEps=1e-8):
super(build_EHRNN, self).__init__()
self.inputDimSize = inputDimSize
self.hiddenDimSize = hiddenDimSize
self.numClass = numClass
self.embSize = embSize
self.batchSize = batchSize
self.dropout = nn.Dropout(p=0.5)
self.logEps = logEps
# Embedding inputs
self.W_emb = nn.Parameter(torch.randn(self.inputDimSize, self.embSize).cuda())
self.b_emb = nn.Parameter(torch.zeros(self.embSize).cuda())
self.W_out = nn.Parameter(torch.randn(self.hiddenDimSize, self.numClass).cuda())
self.b_out = nn.Parameter(torch.zeros(self.numClass).cuda())
self.params = [self.W_emb, self.W_out,
self.b_emb, self.b_out]
def forward(self,x, y, lengths, mask):
self.emb = torch.tanh(torch.matmul(x, self.W_emb) + self.b_emb)
input_values = self.emb
self.outputs = [input_values]
for i, hiddenSize in enumerate([self.hiddenDimSize, self.hiddenDimSize]): # iterate over layers
rnn = EHRNN(self.inputDimSize,hiddenSize,self.embSize,self.batchSize,self.numClass) # calculate hidden states
hidden_state = []
h = self.init_hidden().cuda()
for i,seq in enumerate(input_values): # loop over sequences in each batch
h = rnn(seq, h)
hidden_state.append(h)
hidden_state = self.dropout(torch.stack(hidden_state)) # apply dropout between layers
input_values = hidden_state
y_linear = torch.matmul(hidden_state, self.W_out) + self.b_out # fully connected layer
yhat = F.softmax(y_linear, dim=1) # yhat
yhat = yhat*mask[:,:,None] # apply mask
# Loss calculation
cross_entropy = -(y * torch.log(yhat + self.logEps) + (1. - y) * torch.log(1. - yhat + self.logEps))
last_step = -torch.mean(y[-1] * torch.log(yhat[-1] + self.logEps) + (1. - y[-1]) * torch.log(1. - yhat[-1] + self.logEps))
prediction_loss = torch.sum(torch.sum(cross_entropy, dim=0),dim=1)/ torch.cuda.FloatTensor(lengths)
cost = torch.mean(prediction_loss) + 0.000001 * (self.W_out ** 2).sum() # regularize
return (yhat, hidden_state, cost)
def init_hidden(self):
return torch.zeros(self.batchSize, self.hiddenDimSize) # initial state
Load data :
train, test, valid = load_data(sequences, labels)
Batch Size:
batchSize = 100
n_batches = int(np.ceil(float(len(train[0])) / float(batchSize)))-1
n_batches_valid = int(np.ceil(float(len(valid[0])) / float(batchSize)))-1
Model:
model = build_EHRNN(inputDimSize=4894, hiddenDimSize=200, batchSize=20, embSize=200, numClass=4894, dropout=0.5, logEps=1e-8)
model = model.to(device)
optimizer = torch.optim.Adadelta(model.parameters(), lr = 0.01, rho=0.90)
max_epochs = 5
loss_all = []
iteration = 0
for e in range(max_epochs):
for index in random.sample(range(n_batches), n_batches):
batchX = train[0][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
batchY = train[1][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
optimizer.zero_grad()
x, y, lengths, mask = padding(batchX, batchY, 4894, 4894)
if torch.cuda.is_available():
x, y, lenghts, mask = x.cuda(), y.cuda(), lengths, mask.cuda()
outputs, hidden, cost = model(x,y, h, lengths, mask)
if torch.cuda.is_available():
cost.cuda()
cost.backward()
nn.utils.clip_grad_norm_(model.parameters(), 5)
optimizer.step()
loss_all.append(cost.item())
iteration +=1
if iteration % 10 == 0:
# Calculate Accuracy
losses = []
model.eval()
val_loss = []
for index in random.sample(range(n_batches_valid), n_batches_valid):
validX = valid[0][:n_batches_valid*batchSize][index*batchSize:(index+1)*batchSize]
validY = valid[1][:n_batches_valid*batchSize][index*batchSize:(index+1)*batchSize]
x, y, lengths, mask = padding(validX, validY, 4894, 4894)
if torch.cuda.is_available():
x, y, lenghts, mask = x.cuda(), y.cuda(), lenghts, mask.cuda()
outputs, hidden_val, cost_val = model(x,y, h, lengths, mask)
losses.append(cost_val)
model.train()
print("Epoch: {}/{}...".format(e+1, max_epochs),
"Step: {}...".format(iteration),
"Training Loss: {:.4f}...".format(np.mean(loss_all)),
"Val Loss: {:.4f}".format(torch.mean(torch.tensor(losses))))
ERROR :
ValueError Traceback (most recent call last)
<ipython-input-76-9ca4916456a9> in <module>()
8
9 for e in range(max_epochs):
---> 10 for index in random.sample(range(n_batches), n_batches):
11 batchX = train[0][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
12 batchY = train[1][:n_batches*batchSize][index*batchSize:(index+1)*batchSize]
/usr/lib/python3.7/random.py in sample(self, population, k)
319 n = len(population)
320 if not 0 <= k <= n:
--> 321 raise ValueError("Sample larger than population or is negative")
322 result = [None] * k
323 setsize = 21 # size of a small set minus size of an empty list
ValueError: Sample larger than population or is negative
I tried many things but I couldn't solve the problem.

What is hp_metric in TensorBoard and how to get rid of it?

I am new to Tensorboard.
I am using fairly simple code running an experiment, and this is the output:
I don't remember asking for a hp_metric graph, yet here it is.
What is it and how do I get rid of it?
Full code to reproduce, using Pytorch Lightning (not that I think anyone should have to reproduce this to answer):
Please notice the ONLY line dereferencing TensorBoard is
self.logger.experiment.add_scalars("losses", {"train_loss": loss}, global_step=self.current_epoch)
import torch
from torch import nn
import torch.nn.functional as F
from typing import List, Optional
from pytorch_lightning.core.lightning import LightningModule
from Testing.Research.toy_datasets.ClustersDataset import ClustersDataset
from torch.utils.data import DataLoader
from Testing.Research.config.ConfigProvider import ConfigProvider
from pytorch_lightning import Trainer, seed_everything
from torch import optim
import os
from pytorch_lightning.loggers import TensorBoardLogger
class VAEFC(LightningModule):
# see https://towardsdatascience.com/understanding-variational-autoencoders-vaes-f70510919f73
# for possible upgrades, see https://arxiv.org/pdf/1602.02282.pdf
# https://stats.stackexchange.com/questions/332179/how-to-weight-kld-loss-vs-reconstruction-loss-in-variational-auto-encoder
def __init__(self, encoder_layer_sizes: List, decoder_layer_sizes: List, config):
super(VAEFC, self).__init__()
self._config = config
self.logger: Optional[TensorBoardLogger] = None
assert len(encoder_layer_sizes) >= 3, "must have at least 3 layers (2 hidden)"
# encoder layers
self._encoder_layers = nn.ModuleList()
for i in range(1, len(encoder_layer_sizes) - 1):
enc_layer = nn.Linear(encoder_layer_sizes[i - 1], encoder_layer_sizes[i])
self._encoder_layers.append(enc_layer)
# predict mean and covariance vectors
self._mean_layer = nn.Linear(encoder_layer_sizes[
len(encoder_layer_sizes) - 2],
encoder_layer_sizes[len(encoder_layer_sizes) - 1])
self._logvar_layer = nn.Linear(encoder_layer_sizes[
len(encoder_layer_sizes) - 2],
encoder_layer_sizes[len(encoder_layer_sizes) - 1])
# decoder layers
self._decoder_layers = nn.ModuleList()
for i in range(1, len(decoder_layer_sizes)):
dec_layer = nn.Linear(decoder_layer_sizes[i - 1], decoder_layer_sizes[i])
self._decoder_layers.append(dec_layer)
self._recon_function = nn.MSELoss(reduction='mean')
def _encode(self, x):
for i in range(len(self._encoder_layers)):
layer = self._encoder_layers[i]
x = F.relu(layer(x))
mean_output = self._mean_layer(x)
logvar_output = self._logvar_layer(x)
return mean_output, logvar_output
def _reparametrize(self, mu, logvar):
if not self.training:
return mu
std = logvar.mul(0.5).exp_()
if std.is_cuda:
eps = torch.cuda.FloatTensor(std.size()).normal_()
else:
eps = torch.FloatTensor(std.size()).normal_()
reparameterized = eps.mul(std).add_(mu)
return reparameterized
def _decode(self, z):
for i in range(len(self._decoder_layers) - 1):
layer = self._decoder_layers[i]
z = F.relu((layer(z)))
decoded = self._decoder_layers[len(self._decoder_layers) - 1](z)
# decoded = F.sigmoid(self._decoder_layers[len(self._decoder_layers)-1](z))
return decoded
def _loss_function(self, recon_x, x, mu, logvar, reconstruction_function):
"""
recon_x: generating images
x: origin images
mu: latent mean
logvar: latent log variance
"""
binary_cross_entropy = reconstruction_function(recon_x, x) # mse loss TODO see if mse or cross entropy
# loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
kld_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
kld = torch.sum(kld_element).mul_(-0.5)
# KL divergence Kullback–Leibler divergence, regularization term for VAE
# It is a measure of how different two probability distributions are different from each other.
# We are trying to force the distributions closer while keeping the reconstruction loss low.
# see https://towardsdatascience.com/understanding-variational-autoencoders-vaes-f70510919f73
# read on weighting the regularization term here:
# https://stats.stackexchange.com/questions/332179/how-to-weight-kld-loss-vs-reconstruction-loss-in-variational
# -auto-encoder
return binary_cross_entropy + kld * self._config.regularization_factor
def training_step(self, batch, batch_index):
orig_batch, noisy_batch, _ = batch
noisy_batch = noisy_batch.view(noisy_batch.size(0), -1)
recon_batch, mu, logvar = self.forward(noisy_batch)
loss = self._loss_function(
recon_batch,
orig_batch, mu, logvar,
reconstruction_function=self._recon_function
)
# self.logger.experiment.add_scalars("losses", {"train_loss": loss})
self.logger.experiment.add_scalars("losses", {"train_loss": loss}, global_step=self.current_epoch)
# self.logger.experiment.add_scalar("train_loss", loss, self.current_epoch)
self.logger.experiment.flush()
return loss
def train_dataloader(self):
default_dataset, train_dataset, test_dataset = ClustersDataset.clusters_dataset_by_config()
train_dataloader = DataLoader(train_dataset, batch_size=self._config.batch_size, shuffle=True)
return train_dataloader
def test_dataloader(self):
default_dataset, train_dataset, test_dataset = ClustersDataset.clusters_dataset_by_config()
test_dataloader = DataLoader(test_dataset, batch_size=self._config.batch_size, shuffle=True)
return test_dataloader
def configure_optimizers(self):
optimizer = optim.Adam(model.parameters(), lr=self._config.learning_rate)
return optimizer
def forward(self, x):
mu, logvar = self._encode(x)
z = self._reparametrize(mu, logvar)
decoded = self._decode(z)
return decoded, mu, logvar
if __name__ == "__main__":
config = ConfigProvider.get_config()
seed_everything(config.random_seed)
latent_dim = config.latent_dim
enc_layer_sizes = config.enc_layer_sizes + [latent_dim]
dec_layer_sizes = [latent_dim] + config.dec_layer_sizes
model = VAEFC(config=config, encoder_layer_sizes=enc_layer_sizes, decoder_layer_sizes=dec_layer_sizes)
logger = TensorBoardLogger(save_dir='tb_logs', name='VAEFC')
logger.hparams = config # TODO only put here relevant stuff
# trainer = Trainer(gpus=1)
trainer = Trainer(deterministic=config.is_deterministic,
#auto_lr_find=config.auto_lr_find,
#log_gpu_memory='all',
# min_epochs=99999,
max_epochs=config.num_epochs,
default_root_dir=os.getcwd(),
logger=logger
)
# trainer.tune(model)
trainer.fit(model)
print("done training vae with lightning")
ClustersDataset.py
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import torch
import numpy as np
from Testing.Research.config.ConfigProvider import ConfigProvider
class ClustersDataset(Dataset):
__default_dataset = None
__default_dataset_train = None
__default_dataset_test = None
def __init__(self, cluster_size: int, noise_factor: float = 0, transform=None, n_clusters=2, centers_radius=4.0):
super(ClustersDataset, self).__init__()
self._cluster_size = cluster_size
self._noise_factor = noise_factor
self._n_clusters = n_clusters
self._centers_radius = centers_radius
# self._transform = transform
self._size = self._cluster_size * self._n_clusters
self._create_data_clusters()
self._combine_clusters_to_array()
self._normalize_data()
self._add_noise()
# self._plot()
pass
#staticmethod
def clusters_dataset_by_config():
if ClustersDataset.__default_dataset is not None:
return \
ClustersDataset.__default_dataset, \
ClustersDataset.__default_dataset_train, \
ClustersDataset.__default_dataset_test
config = ConfigProvider.get_config()
default_dataset = ClustersDataset(
cluster_size=config.cluster_size,
noise_factor=config.noise_factor,
transform=None,
n_clusters=config.n_clusters,
centers_radius=config.centers_radius
)
train_size = int(config.train_size * len(default_dataset))
test_size = len(default_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(default_dataset, [train_size, test_size])
ClustersDataset.__default_dataset = default_dataset
ClustersDataset.__default_dataset_train = train_dataset
ClustersDataset.__default_dataset_test = test_dataset
return default_dataset, train_dataset, test_dataset
def _create_data_clusters(self):
self._clusters = [torch.zeros((self._cluster_size, 2)) for _ in range(self._n_clusters)]
centers_radius = self._centers_radius
for i, c in enumerate(self._clusters):
r, x, y = 3.0, centers_radius * np.cos(i * np.pi * 2 / self._n_clusters), centers_radius * np.sin(
i * np.pi * 2 / self._n_clusters)
cluster_length = 1.1
cluster_start = i * 2 * np.pi / self._n_clusters
cluster_end = cluster_length * (i + 1) * 2 * np.pi / self._n_clusters
cluster_inds = torch.linspace(start=cluster_start, end=cluster_end, steps=self._cluster_size,
dtype=torch.float)
c[:, 0] = r * torch.sin(cluster_inds) + y
c[:, 1] = r * torch.cos(cluster_inds) + x
def _plot(self):
plt.figure()
plt.scatter(self._noisy_values[:, 0], self._noisy_values[:, 1], s=1, color='b', label="noisy_values")
plt.scatter(self._values[:, 0], self._values[:, 1], s=1, color='r', label="values")
plt.legend(loc="upper left")
plt.show()
def _combine_clusters_to_array(self):
size = self._size
self._values = torch.zeros(size, 2)
self._labels = torch.zeros(size, dtype=torch.long)
for i, c in enumerate(self._clusters):
self._values[i * self._cluster_size: (i + 1) * self._cluster_size, :] = self._clusters[i]
self._labels[i * self._cluster_size: (i + 1) * self._cluster_size] = i
def _add_noise(self):
size = self._size
mean = torch.zeros(size, 2)
std = torch.ones(size, 2)
noise = torch.normal(mean, std)
self._noisy_values = torch.zeros(size, 2)
self._noisy_values[:] = self._values
self._noisy_values = self._noisy_values + noise * self._noise_factor
def _normalize_data(self):
values_min, values_max = torch.min(self._values), torch.max(self._values)
self._values = (self._values - values_min) / (values_max - values_min)
self._values = self._values * 2 - 1
def __len__(self):
return self._size # number of samples in the dataset
def __getitem__(self, index):
item = self._values[index, :]
noisy_item = self._noisy_values[index, :]
# if self._transform is not None:
# noisy_item = self._transform(item)
return item, noisy_item, self._labels[index]
#property
def values(self):
return self._values
#property
def noisy_values(self):
return self._noisy_values
Config values (ConfigProvider just returns those as an object)
num_epochs: 15
batch_size: 128
learning_rate: 0.0001
auto_lr_find: False
noise_factor: 0.1
regularization_factor: 0.0
cluster_size: 5000
n_clusters: 5
centers_radius: 4.0
train_size: 0.8
latent_dim: 8
enc_layer_sizes: [2, 200, 200, 200]
dec_layer_sizes: [200, 200, 200, 2]
retrain_vae: False
random_seed: 11
is_deterministic: True
It's the default setting of tensorboard in pytorch lightning. You can set default_hp_metric to false to get rid of this metric.
TensorBoardLogger(save_dir='tb_logs', name='VAEFC', default_hp_metric=False)
The hp_metric helps you track the model performance across different hyperparameters. You can check it at hparams in your tensorboard.
hp_metric (hyperparameter metric) is to help you tune your hyperparameters.
You can set this metric to whatever you like as documented in pytorch official docs.
Then, you can look through your hyperparameters and see which come out best according to whichever metric you choose.
Alternatively, if you don't want it, you can disable it as suggested in #joe32140's answer:
You can set default_hp_metric to false to get rid of this metric.
TensorBoardLogger(save_dir='tb_logs', name='VAEFC', default_hp_metric=False)

VGG16 Tensorflow implementation does not learn on cifar-10

This VGGNet was implemented using Tensorflow framework, from scratch, where all of the layers are defined in the code.
The main problem I am facing here is that the training accuracy, not to mention validation accuracy, goes up even though I wait it out for a decent amount of time. There are few problems that I suspect is causing this problem right now. First, I think the network is too deep and wide for cifar-10 dataset. Second, extracting data batch out of the whole dataset is not exhaustive, i.e. Batch selection is used over and over again over the whole dataset without eliminating those examples that were selected in the ongoing epoch.
However, still I could not get this code to work after many hours and days of experiments.
I wish I could extract the problematic code section to ask a question, but since I cannot pinpoint the exact section here, let me upload my whole code.
import os
import sys
import tensorflow as tf
import numpy as np
import scipy as sci
import math
import matplotlib.pyplot as plt
import time
import random
import imageio
import pickle
import cv2
import json
from pycocotools.coco import COCO
class SVGG:
def __init__(self, num_output_classes):
self.input_layer_size = 0
self.num_output_classes = num_output_classes
# Data
self.X = []
self.Y = []
self.working_x = []
self.working_y = []
self.testX = []
self.testY = []
# hard coded for now. Have to change.
self.input_data_size = 32 # 32 X 32
self.input_data_size_flat = 3072 # 32 X 32 X 3 == 3072
self.num_of_channels = 3 # 3 for colour image
self.input_data_size = 32 # 32 X 32
self.input_data_size_flat = self.input_data_size * self.input_data_size # 32 X 32 X 3 == 3072
self.num_of_channels = 3 # 3 for colour image
self.convolution_layers = []
self.convolution_weights = []
self.fully_connected_layers = []
self.fully_connected_weights = []
def feed_examples(self, input_X, input_Y):
"""
Feed examples to be learned
:param input_X: Training dataset X
:param input_Y: Traning dataset label
:return:
"""
# Take first input and calculate its size
# hard code size
self.X = input_X
self.Y = input_Y
self.input_data_size_flat = len(self.X[0]) * len(self.X[0][0]) * len(self.X[0][0][0])
def feed_test_data(self, test_X, test_Y):
self.testX = test_X
self.testY = test_Y
def run(self):
x = tf.placeholder(tf.float32, [None, self.input_data_size_flat], name='x')
x_data = tf.reshape(x, [-1, self.input_data_size, self.input_data_size, 3])
y_true = tf.placeholder(tf.float32, [None, self.num_output_classes], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)
"""
VGG layers
"""
# Create layers
######################################## Input Layer ########################################
input_layer, input_weight = self.create_convolution_layer(x_data, num_input_channels=3, filter_size=3, num_filters=64,
use_pooling=True) # False
######################################## Convolutional Layer ########################################
############### Conv Layer 1 #################
conv_1_1, w_1_1 = self.create_convolution_layer(input=input_layer, num_input_channels=64, filter_size=3, num_filters=64, use_pooling=False)
conv_1_2, w_1_2 = self.create_convolution_layer(input=conv_1_1, num_input_channels=64, filter_size=3, num_filters=128, use_pooling=True)
############### Conv Layer 2 #################
conv_2_1, w_2_1 = self.create_convolution_layer(input=conv_1_2, num_input_channels=128, filter_size=3, num_filters=128, use_pooling=False)
conv_2_2, w_2_2 = self.create_convolution_layer(input=conv_2_1, num_input_channels=128, filter_size=3, num_filters=256, use_pooling=True)
############### Conv Layer 3 #################
conv_3_1, w_3_1 = self.create_convolution_layer(input=conv_2_2, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
conv_3_2, w_3_2 = self.create_convolution_layer(input=conv_3_1, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
conv_3_3, w_3_3 = self.create_convolution_layer(input=conv_3_2, num_input_channels=256, filter_size=3, num_filters=512, use_pooling=True)
############### Conv Layer 4 #################
conv_4_1, w_4_1 = self.create_convolution_layer(input=conv_3_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_4_2, w_4_2 = self.create_convolution_layer(input=conv_4_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_4_3, w_4_3 = self.create_convolution_layer(input=conv_4_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)
############### Conv Layer 5 #################
conv_5_1, w_5_1 = self.create_convolution_layer(input=conv_4_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_5_2, w_5_2 = self.create_convolution_layer(input=conv_5_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
conv_5_3, w_5_3 = self.create_convolution_layer(input=conv_5_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)
layer_flat, num_features = self.flatten_layer(conv_5_3)
######################################## Fully Connected Layer ########################################
fc_1 = self.create_fully_connected_layer(input=layer_flat, num_inputs=num_features, num_outputs=4096)
fc_2 = self.create_fully_connected_layer(input=fc_1, num_inputs=4096, num_outputs=4096)
fc_3 = self.create_fully_connected_layer(input=fc_2, num_inputs=4096, num_outputs=self.num_output_classes, use_dropout=False)
# Normalize prediction
y_prediction = tf.nn.softmax(fc_3)
# The class-number is the index of the largest element
y_prediction_class = tf.argmax(y_prediction, axis=1)
# Cost-Fuction to be optimized
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc_3, labels=y_true)
# => Now we have a measure of how well the model performs on each image individually. But in order to use the
# Cross entropy to guide the optimization of the model's variable swe need a single value, so we simply take the
# Average of the cross-entropy for all the image classifications
cost = tf.reduce_mean(cross_entropy)
# Optimizer
optimizer_adam = tf.train.AdamOptimizer(learning_rate=0.002).minimize(cost)
# Performance measure
correct_prediction = tf.equal(y_prediction_class, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
total_iterations = 0
num_iterations = 100000
start_time = time.time()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(num_iterations):
x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=0, high=40000, batch_size=128)
feed_dict_train = {x: x_batch, y_true: y_true_batch}
sess.run(optimizer_adam, feed_dict_train)
if i % 100 == 99:
# Calculate the accuracy on the training-set.
x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=40000, high=50000, batch_size=1000)
feed_dict_validate = {x: x_batch, y_true: y_true_batch}
acc = sess.run(accuracy, feed_dict=feed_dict_validate)
# Message for printing.
msg = "Optimization Iteration: {0:>6}, Training Accuracy: {1:>6.1%}"
# print(sess.run(y_prediction, feed_dict=feed_dict_train))
# print(sess.run(y_prediction_class, feed_dict=feed_dict_train))
print(msg.format(i + 1, acc))
if i % 10000 == 9999:
oSaver = tf.train.Saver()
oSess = sess
path = "./model/_" + "iteration_" + str(i) + ".ckpt"
oSaver.save(oSess, path)
if i == num_iterations - 1:
x_batch, y_true_batch, _ = self.get_batch(X=self.testX, Y=self.testY, low=0, high=10000, batch_size=10000)
feed_dict_test = {x: x_batch, y_true: y_true_batch}
test_accuracy = sess.run(accuracy, feed_dict=feed_dict_test)
msg = "Test Accuracy: {0:>6.1%}"
print(msg.format(test_accuracy))
def get_batch(self, X, Y, low=0, high=50000, batch_size=128):
x_batch = []
y_batch = np.ndarray(shape=(batch_size, self.num_output_classes))
index = np.random.randint(low=low, high=high, size=batch_size)
counter = 0
for idx in index:
x_batch.append(X[idx].flatten())
y_batch[counter] = one_hot_encoded(Y[idx], self.num_output_classes)
y_batch_cls = Y[idx]
counter += 1
return x_batch, y_batch, y_batch_cls
def generate_new_weights(self, shape):
w = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
return w
def generate_new_biases(self, shape):
b = tf.Variable(tf.constant(0.05, shape=[shape]))
return b
def create_convolution_layer(self, input, num_input_channels, filter_size, num_filters, use_pooling):
"""
:param input: The previous layer
:param num_input_channels: Number of channels in previous layer
:param filter_size: W and H of each filter
:param num_filters: Number of filters
:return:
"""
shape = [filter_size, filter_size, num_input_channels, num_filters]
weights = self.generate_new_weights(shape)
biases = self.generate_new_biases(num_filters)
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
layer += biases
# Max Pooling
if use_pooling:
layer = tf.nn.max_pool(layer, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
# ReLu. Using elu for better performance
layer = tf.nn.elu(layer)
return layer, weights
def create_fully_connected_layer(self, input, num_inputs, num_outputs, use_dropout=True):
weights = self.generate_new_weights(shape=[num_inputs, num_outputs])
biases = self.generate_new_biases(shape=num_outputs)
layer = tf.matmul(input, weights) + biases
layer = tf.nn.elu(layer)
if use_dropout:
keep_prob = tf.placeholder(tf.float32)
keep_prob = 0.5
layer = tf.nn.dropout(layer, keep_prob)
return layer
def flatten_layer(self, layer):
"""
Flattens dimension that is output by a convolution layer.
Flattening is need to feed into a fully-connected-layer.
:param layer:
:return:
"""
# shape [num_images, img_height, img_width, num_channels]
layer_shape = layer.get_shape()
# Number of features h x w x channels
num_features = layer_shape[1: 4].num_elements()
# Reshape
layer_flat = tf.reshape(layer, [-1, num_features])
# Shape is now [num_images, img_height * img_width * num_channels]
return layer_flat, num_features
def unpickle(file):
with open(file, 'rb') as file:
dict = pickle.load(file, encoding='bytes')
return dict
def convert_to_individual_image(flat):
img_R = flat[0:1024].reshape((32, 32))
img_G = flat[1024:2048].reshape((32, 32))
img_B = flat[2048:3072].reshape((32, 32))
#B G R
mean = [125.3, 123.0, 113.9]
img = np.dstack((img_R - mean[0], img_G - mean[1], img_B - mean[2]))
img = np.array(img)
# img = cv2.resize(img, (224, 224), img)
return img
def read_coco_data(img_path, annotation_path):
coco = COCO(annotation_path)
ids = list(coco.imgs.keys())
ann_keys = list(coco.anns.keys())
print(coco.imgs[ids[0]])
print(coco.anns[ann_keys[0]])
def one_hot_encoded(class_numbers, num_classes=None):
if num_classes is None:
num_classes = np.max(class_numbers) + 1
return np.eye(num_classes, dtype=float)[class_numbers]
if __name__ == '__main__':
data = []
labels = []
val_data = []
val_label = []
# cifar-10
counter = 0
for i in range(1, 6):
unpacked = unpickle("./cifar10/data_batch_" + str(i))
tmp_data = unpacked[b'data']
tmp_label = unpacked[b'labels']
inner_counter = 0
for flat in tmp_data:
converted = convert_to_individual_image(flat)
data.append(converted)
labels.append(tmp_label[inner_counter])
counter += 1
inner_counter += 1
cv2.imwrite("./img/" + str(counter) + ".jpg", converted)
# Test data
unpacked = unpickle("./cifar10/test_batch")
test_data = []
test_data_flat = unpacked[b'data']
test_label = unpacked[b'labels']
for flat in test_data_flat:
test_data.append(convert_to_individual_image(flat))
svgg = SVGG(10)
svgg.feed_examples(input_X=data, input_Y=labels)
svgg.feed_test_data(test_X=test_data, test_Y=test_label)
svgg.run()

Pytorch: Custom Loss only works for batch_size == 1

I am currently trying to port my existing (working) keras
BNN code to pytorch.
To this end, I have to write a custom NegativeLogLikelihood loss function. My unit test for this loss passes (e.g. for fixed network weights I get the same results and gradients as in my old (working) keras code), but in a simple dummy example (fitting a sinc function) my loss only gives okay results for batch_size == 1 and my network fails to fit sinc properly (at any amount of training iterations) for larger values. Using nn.MSELoss instead works perfectly fine, so I am assuming an issue with my loss computation.
import matplotlib.pyplot as plt
from itertools import islice
try:
from tqdm import tqdm
except ImportError:
tqdm = lambda x, total: x
import numpy as np
import torch
from torch.utils import data as data_utils
import torch.nn as nn
class NLLLoss(torch.nn.modules.loss._Loss):
def __init__(self, parameters, num_datapoints, size_average=False, reduce=True):
super().__init__(size_average, reduce)
self.parameters = tuple(parameters)
self.num_datapoints = num_datapoints
def log_variance_prior(self, log_variance, mean=1e-6, variance=0.01):
return torch.mean(
torch.sum(
((-(log_variance - torch.log(torch.tensor(mean))) ** 2) /
((2. * variance))) - 0.5 * torch.log(torch.tensor(variance)),
dim=1
)
)
def weight_prior(self, parameters, wdecay=1.):
num_parameters = torch.sum(torch.tensor([
torch.prod(torch.tensor(parameter.size()))
for parameter in parameters
]))
log_likelihood = torch.sum(torch.tensor([
torch.sum(-wdecay * 0.5 * (parameter ** 2))
for parameter in parameters
]))
return log_likelihood / (num_parameters.float() + 1e-16)
def forward(self, input, target):
torch.nn.modules.loss._assert_no_grad(target)
batch_size, *_ = input.shape
prediction_mean = input[:, 0].view(-1, 1)
log_prediction_variance = input[:, 1].view(-1, 1)
prediction_variance_inverse = 1. / (torch.exp(log_prediction_variance) + 1e-16)
mean_squared_error = torch.pow(target - prediction_mean, 2)
log_likelihood = (
torch.sum(
torch.sum(
-mean_squared_error * 0.5 * prediction_variance_inverse -
0.5 * log_prediction_variance,
dim=1
)
)
)
log_likelihood /= batch_size
log_likelihood += (
self.log_variance_prior(log_prediction_variance) / self.num_datapoints
)
log_likelihood += self.weight_prior(self.parameters) / self.num_datapoints
return -log_likelihood
# Helper Functions {{{ #
def infinite_dataloader(dataloader):
while True:
yield from dataloader
def tanh_network(input_dimensionality: int):
class AppendLayer(nn.Module):
def __init__(self, bias=True, *args, **kwargs):
super().__init__(*args, **kwargs)
if bias:
self.bias = nn.Parameter(torch.Tensor(1, 1))
else:
self.register_parameter('bias', None)
def forward(self, x):
return torch.cat((x, self.bias * torch.ones_like(x)), dim=1)
def init_weights(module):
if type(module) == AppendLayer:
nn.init.constant_(module.bias, val=np.log(1e-3))
elif type(module) == nn.Linear:
nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="linear")
nn.init.constant_(module.bias, val=0.0)
return nn.Sequential(
nn.Linear(input_dimensionality, 50), nn.Tanh(),
nn.Linear(50, 50), nn.Tanh(),
nn.Linear(50, 50), nn.Tanh(),
nn.Linear(50, 1),
AppendLayer()
).apply(init_weights)
# }}} Helper Functions #
input_dimensionality, num_datapoints = 1, 100
num_train_steps = 13000
# Set up data
x_train = np.array([
np.random.uniform(np.zeros(1), np.ones(1), input_dimensionality)
for _ in range(num_datapoints)
])
y_train = np.sinc(x_train * 10 - 5).sum(axis=1)
# Data Normalization
x_train_, x_mean, x_std = (
np.true_divide(x_train - np.mean(x_train), np.std(x_train)), np.mean(x_train), np.std(x_train)
)
y_train_, y_mean, y_std = (
np.true_divide(y_train - np.mean(y_train), np.std(y_train)), np.mean(y_train), np.std(y_train)
)
model = tanh_network(input_dimensionality=input_dimensionality)
# TODO Why does setting batch_size to 1 work with NLL, but setting it to higher values fails?
batch_size = 20 # setting this to 1 gives okay results.
loss_function = NLLLoss(model.parameters(), num_datapoints=num_datapoints)
# NOTE: Using MSE like this also works:
# loss_function = lambda input, target: nn.MSELoss()(input=input[:, 0], target=target)
train_loader = infinite_dataloader(
data_utils.DataLoader(
data_utils.TensorDataset(
torch.from_numpy(x_train_).float(),
torch.from_numpy(y_train_).float()
), batch_size=batch_size
)
)
optimizer = torch.optim.Adam(model.parameters())
# Train loop
for epoch, (x_batch, y_batch) in tqdm(enumerate(islice(train_loader, num_train_steps)), total=num_train_steps):
optimizer.zero_grad()
y_pred = model(x_batch)
loss = loss_function(input=y_pred, target=y_batch)
loss.backward()
optimizer.step()
if epoch % 100 == 0:
mse_value = nn.MSELoss()(input=y_pred[:, 0], target=y_batch)
print("Epoch: {}, Loss: {}, MSE: {}".format(epoch, loss, mse_value))
x_test = np.linspace(0, 1, 100)[:, None]
y_test = np.sinc(x_test * 10 - 5).sum(axis=1)
# Data Normalization
x_test_ = np.true_divide(x_test - x_mean, x_std)
x_test_torch = torch.from_numpy(x_test_).float()
y_test_torch = torch.from_numpy(y_test).float()
# Unnormalize predictions
y_pred = model(x_test_torch).detach().numpy() * y_std + y_mean
plt.plot(x_test[:, 0], y_test, label="true", color="black")
plt.plot(x_train[:, 0], y_train, "ro")
plt.plot(x_test[:, 0], y_pred[:, 0], label="Adam", color="blue")
plt.legend()
plt.show()
Any help or suggestions on what I could be doing wrong are very appreciated!

Categories