When I start to train my model, Loss values decreasing but Accuracy values never change.I don't know why?
# -*- coding: utf-8 -*-
#Libraries
import torch
import torch.nn.functional as F
from torch import autograd, nn
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms, datasets
from torch.utils import data
"""
Olivetti face dataset
"""
from sklearn.datasets import fetch_olivetti_faces
# Olivetti dataset download
olivetti = fetch_olivetti_faces()
train = olivetti.images
label = olivetti.target
X = train
Y = label
print("Format for X:", X.shape)
print("Format for Y: ", Y.shape)
print("\nDownload Ok")
"""
Set for train
"""
train_rate = 0.8
X_train = np.zeros([int(train_rate * X.shape[0]),64,64], dtype=float)
Y_train = np.zeros([int(train_rate * X.shape[0])], dtype=int)
X_val = np.zeros([int((1-train_rate) * X.shape[0]+1),64,64], dtype=float)
Y_val = np.zeros([int((1-train_rate) * X.shape[0]+1)], dtype=int)
#Split data for train and validation
for i in range(X.shape[0]):
ie=0
iv=0
if (i%10)/10 >= train_rate:
X_train[ie] = X[i]
Y_train[ie] = Y[i]
ie += 1
else:
X_val[iv] = X[i]
Y_val[iv] = Y[i]
iv += 1
X_train = X_train.reshape(320,-1,64,64)
X_val = X_val.reshape(80,-1,64,64)
print(Y_train.shape)
X_train = torch.Tensor(X_train)
Y_train = torch.Tensor(Y_train)
X_val = torch.Tensor(X_val)
Y_val = torch.Tensor(Y_val)
batch_size = 20
train_loader = torch.utils.data.DataLoader(X_train,
batch_size=batch_size,
)
val_loader = torch.utils.data.DataLoader(X_val,
batch_size=batch_size,
)
class CNNModule(nn.Module):
def __init__(self):
super(CNNModule, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 13 * 13, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 40)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 13 * 13)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def make_train(model,dataset,n_iters,gpu):
# Organize data
X_train,Y_train,X_val,Y_val = dataset
kriter = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)
#Arrays to save loss and accuracy
tl=np.zeros(n_iters) #For train loss
ta=np.zeros(n_iters) #For train accuracy
vl=np.zeros(n_iters) #For validation loss
va=np.zeros(n_iters) #For validation accuracy
# Convert labels to long
Y_train = Y_train.long()
Y_val = Y_val.long()
# GPU control
if gpu:
X_train,Y_train = X_train.cuda(),Y_train.cuda()
X_val,Y_val = X_val.cuda(),Y_val.cuda()
model = model.cuda() # Parameters to GPU!
print("Using GPU")
else:
print("Using CPU")
# print(X_train.shape)
# print(Y_train.shape)
for i in range(n_iters):
# train forward
train_out = model.forward(X_train)
train_loss = kriter(train_out,Y_train)
# Backward and optimization
train_loss.backward()
optimizer.step()
optimizer.zero_grad()
# Compute train accuracy
train_predict = train_out.cpu().detach().argmax(dim=1)
train_accuracy = (train_predict.cpu().numpy()==Y_train.cpu().numpy()).mean()
# For validation
val_out = model.forward(X_val)
val_loss = kriter(val_out,Y_val)
# Compute validation accuracy
val_predict = val_out.cpu().detach().argmax(dim=1)
val_accuracy = (val_predict.cpu().numpy()==Y_val.cpu().numpy()).mean()
tl[i] = train_loss.cpu().detach().numpy()
ta[i] = train_accuracy
vl[i] = val_loss.cpu().detach().numpy()
va[i] = val_accuracy
# Show result each 5 loop
if i%5==0:
print("Loop --> ",i)
print("Train Loss :",train_loss.cpu().detach().numpy())
print("Train Accuracy :",train_accuracy)
print("Validation Loss :",val_loss.cpu().detach().numpy())
print("Validation Accuracy :",val_accuracy)
model = model.cpu()
#Print result
plt.subplot(2,2,1)
plt.plot(np.arange(n_iters), tl, 'r-')
plt.subplot(2,2,2)
plt.plot(np.arange(n_iters), ta, 'b--')
plt.subplot(2,2,3)
plt.plot(np.arange(n_iters), vl, 'r-')
plt.subplot(2,2,4)
plt.plot(np.arange(n_iters), va, 'b--')
dataset = X_train,Y_train,X_val,Y_val
gpu = True
gpu = gpu and torch.cuda.is_available()
model = CNNModule()
make_train(model,dataset,100,gpu)
OUTPUT:
Using CPU
Loop --> 0
Train Loss : 3.6302185
Train Accuracy : 0.0
Validation Loss : 3.6171098
Validation Accuracy : 0.0
Loop --> 5
Train Loss : 3.557933
Train Accuracy : 0.996875
Validation Loss : 3.545982
Validation Accuracy : 0.9875
.
.
.
Loop --> 95
Train Loss : 0.04211783
Train Accuracy : 0.996875
Validation Loss : 0.13397054
Validation Accuracy : 0.9875
From your code,
train_accuracy = (train_predict.cpu().numpy()==Y_train.cpu().numpy()).mean()
you are taking mean of correct values that's why you are getting same answer in every loop. Instead you should divide the total correct numbers with total number of examples to find the accuracy.
Related
I have classification problem. I am using Pytorch, My input is sequence of length 341 and output one of three classes {0,1,2}, I want to train linear regression model using pytorch, I created the following class but during the training, the loss values start to have numbers then inf then NAN. I do not know how to fix that . Also I tried to initialize the weights for linear model but it is the same thing. Any suggestions.
class regression(nn.Module):
def __init__(self, input_dim):
super().__init__()
self.input_dim = input_dim
# One layer
self.linear = nn.Linear(input_dim, 1)
def forward(self, x):
y_pred = self.linear(x)
return y_pred
criterion = torch.nn.MSELoss()
def fit(model, data_loader, optim, epochs):
for epoch in range(epochs):
for i, (X, y) in enumerate(data_loader):
X = X.float()
y = y.unsqueeze(1).float()
X = Variable(X, requires_grad=True)
y = Variable(y, requires_grad=True)
# Make a prediction for the input X
pred = model(X)
#loss = (y-pred).pow(2).mean()
loss = criterion(y, pred)
optim.zero_grad()
loss.backward()
optim.step()
print(loss)
print(type(loss))
# Give some feedback after each 5th pass through the data
if epoch % 5 == 0:
print("Epoch", epoch, f"loss: {loss}")
return None
regnet = regression(input_dim=341)
optim = SGD(regnet.parameters(), lr=0.01)
fit(regnet, data_loader, optim=optim, epochs=5)
pred = regnet(torch.Tensor(test_set.data_info).float())
pred = pred.detach().numpy()
I would additionally suggest to replace MSE with CrossEntropy Loss as it is better suited for multi-class classificiation problems.
import random
import torch
from torch import nn, optim
from matplotlib import pyplot as plt
# Generate random dataset with your shape to test
# Replace this with your own dataset
data = []
for label in [0, 1, 2]:
for i in range(1000):
data.append((torch.rand(341), label))
# train test split
random.shuffle(data)
train, val = data[:1500], data[1500:]
def run_gradient_descent(model, data_train, data_val, batch_size=64, learning_rate=0.01, weight_decay=0, num_epochs=10):
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
iters, losses = [], []
iters_sub, train_acc, val_acc = [], [] ,[]
train_loader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=True)
# training
n = 0 # the number of iterations
for epoch in range(num_epochs):
for xs, ts in iter(train_loader):
if len(ts) != batch_size:
continue
zs = model(xs)
loss = criterion(zs, ts) # compute the total loss
loss.backward() # compute updates for each parameter
optimizer.step() # make the updates for each parameter
optimizer.zero_grad() # a clean up step for PyTorch
# save the current training information
iters.append(n)
losses.append(float(loss)/batch_size) # compute *average* loss
if n % 10 == 0:
iters_sub.append(n)
train_acc.append(get_accuracy(model, data_train))
val_acc.append(get_accuracy(model, data_val))
# increment the iteration number
n += 1
# plotting
plt.title("Training Curve (batch_size={}, lr={})".format(batch_size, learning_rate))
plt.plot(iters, losses, label="Train")
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.show()
plt.title("Training Curve (batch_size={}, lr={})".format(batch_size, learning_rate))
plt.plot(iters_sub, train_acc, label="Train")
plt.plot(iters_sub, val_acc, label="Validation")
plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.legend(loc='best')
plt.show()
return model
def get_accuracy(model, data):
loader = torch.utils.data.DataLoader(data, batch_size=500)
correct, total = 0, 0
for xs, ts in loader:
zs = model(xs)
pred = zs.max(1, keepdim=True)[1] # get the index of the max logit
correct += pred.eq(ts.view_as(pred)).sum().item()
total += int(ts.shape[0])
return correct / total
class MyRegression(nn.Module):
def __init__(self, input_dim, output_dim):
super(MyRegression, self).__init__()
# One layer
self.linear = nn.Linear(input_dim, output_dim)
def forward(self, x):
return self.linear(x)
model = MyRegression(341, 3)
run_gradient_descent(model, train, val, batch_size=64, learning_rate=0.01, num_epochs=10)
cause of my reputation number I can't comment.so if I was you. I'm gonna build like this: I think there is something wrong with your method of making a Module.
class regression(nn.Module):
def __init__(self,input_dim,output_dim):
super(regression,self).__init__()
#function
self.linear=nn.Linear(input_dim,output_dim)
def forward(self,x):
return self.linear(x)
#define the model
input_dim=341
output_dim=3
model=LinearRegression(input_dim,output_dim)
# Mean square error
mse=nn.MSELoss()
#Optimization
learning_rate=0.01
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)
#train the model
loss_list=[]
iteration_number=X
for iteration in range(iteration_number):
#optimiziation
optimizer.zero_grad()
#forward to get output
results=model("input_datas_tensor")
#loss calculate
loss=mse(results,"outputs_datas_tensor")
#backward propagation
loss.backward()
#updating parameters
optimizer.step()
#store loss
loss_list.append(loss.data)
if(iteration %5==0):
print("epoch{} ,loss{}".format(iteration,loss.data))
Im a real newbie on PyTorch and Neural Networks. I have started to work on these suubjects this week and my mentor has gave me a code and with some tasks to work on the code.
But the code that he gave me is not working. I have tried to fix this all day but got no result. Because i do not know the background of the NN's and PyTorch it is harder to understand the problem.
Need your help on that.
Thank you !
import torch
import numpy as np
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchsummary import summary
#DEFINE YOUR DEVICE
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device) #if cpu, go Runtime-> Change runtime type-> Hardware accelerator GPU -> Save -> Redo previous steps
#DOWNLOAD DATASET
train_data = datasets.Omniglot('./data', background=True, download = True, transform = transforms.ToTensor())
test_data = datasets.Omniglot('./data',background = False, download = True, transform = transforms.ToTensor())
#DEFINE DATA GENERATOR
batch_size = 50
train_generator = torch.utils.data.DataLoader(train_data, batch_size = batch_size, shuffle = True)
test_generator = torch.utils.data.DataLoader(test_data, batch_size = batch_size, shuffle = False)
#DEFINE NEURAL NETWORK MODEL
class CNN(torch.nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 8, kernel_size = 4, stride = 1)
self.conv2 = torch.nn.Conv2d(8, 16, kernel_size = 4, stride = 1)
self.mpool = torch.nn.MaxPool2d(2)
self.fc1 = torch.nn.Linear(18432, 256)
self.fc2 = torch.nn.Linear(256, 64)
self.fc3 = torch.nn.Linear(64, 50)
self.relu = torch.nn.ReLU()
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
hidden = self.mpool(self.relu(self.conv1(x)))
hidden = self.mpool(self.relu(self.conv2(hidden)))
hidden = hidden.view(-1,18432)
hidden = self.relu(self.fc1(hidden))
hidden = self.relu(self.fc2(hidden))
output = self.fc3(hidden)
return output
# CREATE MODEL
model = CNN()
model.to(device)
summary(model, (1, 105, 105))
# DEFINE LOSS FUNCTION AND OPTIMIZER
learning_rate = 0.001
loss_fun = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# TRAIN THE MODEL
model.train()
epoch = 10
num_of_batch = np.int(len(train_generator.dataset) / batch_size)
loss_values = np.zeros(epoch * num_of_batch)
for i in range(epoch):
for batch_idx, (x_train, y_train) in enumerate(train_generator):
x_train, y_train = x_train.to(device), y_train.to(device)
optimizer.zero_grad()
y_pred = model(x_train)
loss = loss_fun(y_pred, y_train)
loss_values[num_of_batch * i + batch_idx] = loss.item()
loss.backward()
optimizer.step()
if (batch_idx + 1) % batch_size == 0:
print('Epoch: {}/{} [Batch: {}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
i + 1, epoch, (batch_idx + 1) * len(x_train), len(train_generator.dataset),
100. * (batch_idx + 1) / len(train_generator), loss.item()))
#PLOT THE LEARNING CURVE
iterations = np.linspace(0,epoch,num_of_batch*epoch)
plt.plot(iterations, loss_values)
plt.title('Learning Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid('on')
#TEST THE MODEL
model.eval()
correct=0
total=0
for x_val, y_val in test_generator:
x_val = x_val.to(device)
y_val = y_val.to(device)
output = model(x_val)
y_pred = output.argmax(dim=1)
for i in range(y_pred.shape[0]):
if y_val[i]==y_pred[i]:
correct += 1
total +=1
print('Validation accuracy: %.2f%%' %((100*correct)//(total)))
Here is the error code that i receive.
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py:528: UserWarning: Using a target size (torch.Size([50])) that is different to the input size (torch.Size([25, 50])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-11-bffd863688df> in <module>()
13 loss = loss_fun(y_pred, y_train)
14 loss_values[num_of_batch*i+batch_idx] = loss.item()
---> 15 loss.backward()
16 optimizer.step()
17 if (batch_idx+1) % batch_size == 0:
1 frames
/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
147 Variable._execution_engine.run_backward(
148 tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 149 allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
150
151
RuntimeError: Found dtype Long but expected Float
Your dataset is returning integers for your labels, you should cast them to floating points. One way of solving it is to do:
loss = loss_fun(y_pred, y_train.float())
I recently shifted to pytorch from keras and I am still trying to understand how all this work. Below is the code I have implemented to classify mnist dataset using a simple MLP. Just like I used to do in keras I have flattend each of 28x28 image into a vector of 784 , and I have also created a one-hot representation for my labels.
In the model I was hoping that given a vector of 784 the model would output a one-hot vector with probabilities,but as soon as my code reaches to compute the loss I get the following error :
RuntimeError: 1D target tensor expected, multi-target not supported
Below is my code :
import numpy as np
import matplotlib.pyplot as plt
import torch
import time
from torch import nn, optim
from keras.datasets import mnist
from torch.utils.data import Dataset, DataLoader
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
# ----------------------------------------------------
class MnistDataset(Dataset):
def __init__(self, data_size=0):
(x, y), (_, _) = mnist.load_data()
x = [i.flatten() for i in x]
x = np.array(x, dtype=np.float32)
if data_size < 0 or data_size > len(y):
assert ("Data size should be between 0 to number of files in the dataset")
if data_size == 0:
data_size = len(y)
self.data_size = data_size
# picking 'data_size' random samples
self.x = x[:data_size]
self.y = y[:data_size]
# scaling between 0-1
self.x = (self.x / 255)
# Creating one-hot representation of target
y_encoded = []
for label in y:
encoded = np.zeros(10)
encoded[label] = 1
y_encoded.append(encoded)
self.y = np.array(y_encoded)
def __len__(self):
return self.data_size
def __getitem__(self, index):
x_sample = self.x[index]
label = self.y[index]
return x_sample, label
# ----------------------------------------------------
num_train_samples = 10000
num_test_samples = 2000
# Each generator returns a single
# sample & its label on each iteration.
mnist_train = MnistDataset(data_size=num_train_samples)
mnist_test = MnistDataset(data_size=num_test_samples)
# Each generator returns a batch of samples on each iteration.
train_loader = DataLoader(mnist_train, batch_size=128, shuffle=True) # 79 batches
test_loader = DataLoader(mnist_test, batch_size=128, shuffle=True) # 16 batches
# ----------------------------------------------------
# Defining the Model Architecture
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(28 * 28, 100)
self.act1 = nn.ReLU()
self.fc2 = nn.Linear(100, 50)
self.act2 = nn.ReLU()
self.fc3 = nn.Linear(50, 10)
self.act3 = nn.Sigmoid()
def forward(self, x):
x = self.act1(self.fc1(x))
x = self.act2(self.fc2(x))
output = self.act3(self.fc3(x))
return output
# ----------------------------------------------------
model = MLP()
# Defining optimizer and loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
# ----------------------------------------------------
# Training the model
epochs = 10
print("Training Started...")
for epoch in range(epochs):
for batch_index, (inputs, targets) in enumerate(train_loader):
optimizer.zero_grad() # Zero the gradients
outputs = model(inputs) # Forward pass
loss = criterion(outputs, targets) # Compute the Loss
loss.backward() # Compute the Gradients
optimizer.step() # Update the parameters
# Evaluating the model
total = 0
correct = 0
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(test_loader):
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()
print('Epoch : {} Test Acc : {}'.format(epoch, (100. * correct / total)))
print("Training Completed Sucessfully")
# ----------------------------------------------------
I also read some other posts related to the same problem & most of them said that the CrossEntropy loss the target has to be a single number ,which totally gets over my head.Can someone please explain a solution.Thank you.
For nn.CrossEntropyLoss, you don't need one-hot representation of the label, you just need to pass the prediction's logit, which shape is (batch_size, n_class), and a target vector (batch_size,)
So just pass in the label index vector y instead of one-hot vector.
Fixed of your code:
class MnistDataset(Dataset):
def __init__(self, data_size=0):
(x, y), (_, _) = mnist.load_data()
x = [i.flatten() for i in x]
x = np.array(x, dtype=np.float32)
if data_size < 0 or data_size > len(y):
assert ("Data size should be between 0 to number of files in the dataset")
if data_size == 0:
data_size = len(y)
self.data_size = data_size
# picking 'data_size' random samples
self.x = x[:data_size]
self.y = y[:data_size]
# scaling between 0-1
self.x = (self.x / 255)
self.y = y # <--
def __len__(self):
return self.data_size
def __getitem__(self, index):
x_sample = self.x[index]
label = self.y[index]
return x_sample, label
Take a look at Pytorch example for more detail:
https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
Purely for learning, I'd like to get the following code to work, without a DataLoader. I use Huggingface transformers regularly yet I struggle with PyTorch dimensions all the time so I have started with some simple projects from the book "Deep Learning with PyTorch." One of the problems from the book suggested using a wine quality dataset on a super simple linear model. I have toiled with the dimensions of the data, which I think is the source of my error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (3919x1 and 11x100)
Data is available here
import csv
from collections import OrderedDict
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
wine_path = "winequality-white.csv"
wine_quality_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=";",
skiprows=1)
col_list = next(csv.reader(open(wine_path), delimiter=';'))
wineq = torch.from_numpy(wine_quality_numpy)
# print(wineq.shape, wineq.dtype)
data = wineq[:, :-1]
target = wineq[:, -1]
target = target.unsqueeze(1)
n_samples = wine_quality_numpy.shape[0]
n_val = int(0.2 * n_samples)
shuffled_indices = torch.randperm(n_samples)
train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]
target_train = target[train_indices]
data_train = data[train_indices]
target_val = target[val_indices]
data_val = data[val_indices]
seq_model = nn.Sequential(OrderedDict([
('hidden_linear', nn.Linear(11, 100)),
('hidden_activation', nn.Tanh()),
('output_linear', nn.Linear(100, 7))
]))
def training_loop(n_epochs, optimizer, model, loss_fn, target_train, target_val,
data_train, data_val):
for epoch in range(1, n_epochs + 1):
t_p_train = model(target_train) # <1>
loss_train = loss_fn(t_p_train, data_train)
t_p_val = model(t_u_val) # <1>
loss_val = loss_fn(t_p_val, data_val)
optimizer.zero_grad()
loss_train.backward() # <2>
optimizer.step()
if epoch == 1 or epoch % 1000 == 0:
print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
f" Validation loss {loss_val.item():.4f}")
optimizer = optim.SGD(seq_model.parameters(), lr=1e-3) # <1>
training_loop(
n_epochs = 5000,
optimizer = optimizer,
model = seq_model,
loss_fn = nn.MSELoss(),
target_train = target_train,
target_val = target_val,
data_train = data_train,
data_val = data_val)
Thank you!
In my haste I had the training data and labels swapped. Here is the fixed section.
seq_model = nn.Sequential(OrderedDict([
('hidden_linear', nn.Linear(11, 100)),
('hidden_activation', nn.Tanh()),
('output_linear', nn.Linear(100, 7))
]))
def training_loop(n_epochs, optimizer, model, loss_fn, target_train, target_val,
data_train, data_val):
for epoch in range(1, n_epochs + 1):
t_p_train = model(data_train) # <1>
loss_train = loss_fn(t_p_train, target_train)
t_p_val = model(data_val) # <1>
loss_val = loss_fn(t_p_val, target_val)
optimizer.zero_grad()
loss_train.backward() # <2>
optimizer.step()
if epoch == 1 or epoch % 1000 == 0:
print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
f" Validation loss {loss_val.item():.4f}")
I am trying to replicate old code that I had in tensorflow but in Keras format. For some reason my loss is always nan. I think the error is in the loss that I am using ('categorical_crossentropy' in keras vs 'tf.nn.softmax_cross_entropy_with_logits' in tensorflow)
Keras code:
import keras
from keras.models import Sequential
from keras.layers import Dropout, Dense, Activation
from keras.regularizers import l2
from keras.layers.normalization import BatchNormalization
# Keras items
from keras.optimizers import Adam, Nadam
from keras.activations import relu, elu
from keras.losses import binary_crossentropy, categorical_crossentropy
from keras import metrics
import pandas as pd
import numpy as np
x_main = pd.read_csv("glioma DB X.csv")
y_main = pd.read_csv("glioma DB Y.csv")
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_main, y_main, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
# train shape
np.shape(x_train), np.shape(y_train)
((132, 47), (132, 1))
# Normalize training data; will want to have the same mu and sigma for test
def normalize_features(dataset):
mu = np.mean(dataset, axis = 0) # columns
sigma = np.std(dataset, axis = 0)
norm_parameters = {'mu': mu,
'sigma': sigma}
return (dataset-mu)/(sigma+1e-10), norm_parameters
# Normal X data; using same mu and sigma from test set;
x_train, norm_parameters = normalize_features(x_train)
x_val = (x_val-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_test = (x_test-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
params = {'lr': 0.001,
'batch_size': 30,
'epochs': 8000,
'dropout': 0.5,
'weight_regulizer':['l2'],
'optimizer': 'adam',
'losses': 'categorical_crossentropy',
'activation':'relu',
'last_activation': 'softmax'}
from keras.utils.np_utils import to_categorical
#categorical_labels = to_categorical(int_labels, num_classes=None)
if params['losses']=='categorical_crossentropy':
y_train = to_categorical(y_train,num_classes=4)
y_val = to_categorical(y_val,num_classes=4)
y_test = to_categorical(y_test,num_classes=4)
model = Sequential()
# layer 1
model.add(Dense(30, input_dim=x_train.shape[1],
W_regularizer=l2(0.01),
kernel_initializer='he_uniform'))
model.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout']))
# layer 2
model.add(Dense(20, W_regularizer=l2(0.01),
kernel_initializer='he_uniform'))
model.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout']))
# if we want to also test for number of layers and shapes, that's possible
#hidden_layers(model, params, 1)
# Last layer
model.add(Dense(4, activation=params['last_activation'],
kernel_initializer='he_uniform'))
model.compile(loss=params['losses'],
optimizer=keras.optimizers.adam(lr=params['lr']),
metrics=['categorical_accuracy'])
history = model.fit(x_train, y_train,
validation_data=[x_val, y_val],
batch_size=params['batch_size'],
epochs=params['epochs'],
verbose=1)
Working code using tensorflow which gives me a pretty loss graph haha:
x_train, x_test, y_train, y_test = train_test_split(X_main, Y_main, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
# ANOTHER OPTION IS TO USE SKLEARN sklearn.model_selection.ShuffleSplit
# look into stratification
# Normalize training data; will want to have the same mu and sigma for test
def normalize_features(dataset):
mu = np.mean(dataset, axis = 0) # columns
sigma = np.std(dataset, axis = 0)
norm_parameters = {'mu': mu,
'sigma': sigma}
return (dataset-mu)/(sigma+1e-10), norm_parameters
# TRY LOG TRANSFORMATION LOG(1+X) to deal with outliers
# change ordinal to one hot vector
# to make label encoder
# for c in x_train.columns[x_train.dtype == 'object']:
# X[c] (which was copy of xtrain) X[c].factorize()[0]
# able to plot feature importance in random forest
# Normal X data; using same mu and sigma from test set; then transposed
x_train, norm_parameters = normalize_features(x_train)
x_val = (x_val-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_test = (x_test-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_train = np.transpose(x_train)
x_val = np.transpose(x_val)
x_test = np.transpose(x_test)
y_train = np.transpose(y_train)
y_val = np.transpose(y_val)
y_test = np.transpose(y_test)
# converting values from database to matrix
x_train = x_train.as_matrix()
x_val = x_val.as_matrix()
x_test = x_test.as_matrix()
y_train = y_train.as_matrix()
y_val = y_val.as_matrix()
y_test = y_test.as_matrix()
# testing shape
#print(y_train.shape)
#print(y_val.shape)
#print(y_test.shape)
#
#print(x_train.shape)
#print(x_val.shape)
#print(x_test.shape)
# convert y to array per value so 3 = [0 0 1]
def convert_to_one_hot(Y, C):
Y = np.eye(C)[Y.reshape(-1)].T
return Y
y_train = convert_to_one_hot(y_train, 4)
y_val = convert_to_one_hot(y_val, 4)
y_test = convert_to_one_hot(y_test, 4)
print ("number of training examples = " + str(x_train.shape[1]))
print ("number of test examples = " + str(x_test.shape[1]))
print ("X_train shape: " + str(x_train.shape))
print ("Y_train shape: " + str(y_train.shape))
print ("X_test shape: " + str(x_test.shape))
print ("Y_test shape: " + str(y_test.shape))
# minibatches for later
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
"""
Creates a list of random minibatches from (X, Y)
Arguments:
X -- input data, of shape (input size, number of examples)
Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
mini_batch_size - size of the mini-batches, integer
seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
Returns:
mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
"""
m = X.shape[1] # number of training examples
mini_batches = []
# Step 1: Shuffle (X, Y)
permutation = list(np.random.permutation(m))
shuffled_X = X[:, permutation]
shuffled_Y = Y[:, permutation].reshape((Y.shape[0],m))
# Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
for k in range(0, num_complete_minibatches):
mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
# starting TF graph
# Create X and Y placeholders
def create_xy_placeholder(n_x, n_y):
X = tf.placeholder(tf.float32, shape = [n_x, None], name = 'X')
Y = tf.placeholder(tf.float32, shape = [n_y, None], name = 'Y')
return X, Y
# initialize parameters hidden layers
def initialize_parameters(n_x, scale, hidden_units):
hidden_units= [n_x] + hidden_units
parameters = {}
regularizer = tf.contrib.layers.l2_regularizer(scale)
for i in range(0, len(hidden_units[1:])):
with tf.variable_scope('hidden_parameters_'+str(i+1)):
w = tf.get_variable("W"+str(i+1), [hidden_units[i+1], hidden_units[i]],
initializer=tf.contrib.layers.xavier_initializer(),
regularizer=regularizer)
b = tf.get_variable("b"+str(i+1), [hidden_units[i+1], 1],
initializer = tf.constant_initializer(0.1))
parameters.update({"W"+str(i+1): w})
parameters.update({"b"+str(i+1): b})
return parameters
# forward progression with batch norm and dropout
def forward_propagation(X, parameters, batch_norm=False, keep_prob=1):
a_new = X
for i in range(0, int(len(parameters)/2)-1):
with tf.name_scope('forward_pass_'+str(i+1)):
w = parameters['W'+str(i+1)]
b = parameters['b'+str(i+1)]
z = tf.matmul(w, a_new) + b
if batch_norm == True:
z = tf.layers.batch_normalization(z, momentum=0.99, axis=0)
a = tf.nn.relu(z)
if keep_prob < 1:
a = tf.nn.dropout(a, keep_prob)
a_new = a
tf.summary.histogram('act_'+str(i+1), a_new)
# calculating final Z before input into cost as logit
with tf.name_scope('forward_pass_'+str(int(len(parameters)/2))):
w = parameters['W'+str(int(len(parameters)/2))]
b = parameters['b'+str(int(len(parameters)/2))]
z = tf.matmul(w, a_new) + b
if batch_norm == True:
z = tf.layers.batch_normalization(z, momentum=0.99, axis=0)
return z
# compute cost with option for l2 regularizatoin
def compute_cost(z, Y, parameters, l2_reg=False):
with tf.name_scope('cost'):
logits = tf.transpose(z)
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits,
labels = labels))
if l2_reg == True:
reg = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
cost = cost + tf.reduce_sum(reg)
with tf.name_scope('Pred/Accuracy'):
prediction=tf.argmax(z)
correct_prediction = tf.equal(tf.argmax(z), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
return cost, prediction, accuracy
# defining the model (need to add keep_prob for dropout)
def model(X_train, Y_train, X_test, Y_test,
hidden_units=[30, 20, 4], # hidden units/layers
learning_rate = 0.0001, # Learning rate
num_epochs = 10000, minibatch_size = 30, # minibatch/ number epochs
keep_prob=0.5, # dropout
batch_norm=True, # batch normalization
l2_reg=True, scale = 0.01, # L2 regularization/scale is lambda
print_cost = True):
ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables
tf.set_random_seed(1) # to keep consistent results
seed = 3 # to keep consistent results
(n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set)
n_y = Y_train.shape[0] # n_y : output size
costs = [] # To keep track of the cost
# Create Placeholders of shape (n_x, n_y)
X, Y = create_xy_placeholder(n_x, n_y)
# Initialize parameters
parameters = initialize_parameters(n_x, scale, hidden_units)
# Forward propagation: Build the forward propagation in the tensorflow graph
z = forward_propagation(X, parameters, keep_prob, batch_norm)
# Cost function: Add cost function to tensorflow graph
cost, prediction, accuracy = compute_cost(z, Y, parameters, l2_reg)
# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
# Initialize all the variables
init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# Start the session to compute the tensorflow graph
with tf.Session(config=config) as sess:
# Run the initialization
sess.run(init)
# Do the training loop
for epoch in range(num_epochs):
epoch_cost = 0. # Defines a cost related to an epoch
num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
seed = seed + 1
minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
# Select a minibatch
(minibatch_X, minibatch_Y) = minibatch
# IMPORTANT: The line that runs the graph on a minibatch.
# Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
_ , minibatch_cost = sess.run([optimizer, cost],
feed_dict = {X: minibatch_X, Y: minibatch_Y})
epoch_cost += minibatch_cost / num_minibatches
# Print the cost every epoch
if print_cost == True and epoch % 100 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
prediction1=tf.argmax(z)
# print('Z5: ', Z5.eval(feed_dict={X: minibatch_X, Y: minibatch_Y}))
print('prediction: ', prediction1.eval(feed_dict={X: minibatch_X,
Y: minibatch_Y}))
correct1=tf.argmax(Y)
# print('Y: ', Y.eval(feed_dict={X: minibatch_X,
# Y: minibatch_Y}))
print('correct: ', correct1.eval(feed_dict={X: minibatch_X,
Y: minibatch_Y}))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
# lets save the parameters in a variable
parameters = sess.run(parameters)
print ("Parameters have been trained!")
# Calculate the correct predictions
correct_prediction = tf.equal(tf.argmax(z), tf.argmax(Y))
# Calculate accuracy on the test set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
# run model on test data
parameters = model(x_train, y_train, x_test, y_test, keep_prob=1)