I'm trying to do machine learning using mnist dataset in pytorch, but the loss function doesn't decrease under 1.5.
I want the loss function to decrease under 1.
What to do for this problem?
what code should I fix?
This is my code:
BATCH_SIZE = 8
transform =torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.MNIST(root = "./data/train", train = True, download = True, transform = transform)
trainset, valset = torch.utils.data.random_split(trainset, [50000, 10000])
trainloader = torch.utils.data.DataLoader(trainset, batch_size = BATCH_SIZE, shuffle = True)
valloader = torch.utils.data.DataLoader(valset, batch_size = BATCH_SIZE, shuffle = True)
testset = torchvision.datasets.MNIST(root = "./data/test", train = False, download = True, transform = transform)
testloader = torch.utils.data.DataLoader(testset, batch_size = BATCH_SIZE, shuffle = False
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(2, stride=2)
self.conv1 = nn.Conv2d(1,20,5)
self.conv2 = nn.Conv2d(20,50,5)
self.conv3 = nn.Conv2d(50,500,4)
self.conv4 = nn.Conv2d(500,10,1)
self.soft = nn.Softmax(dim=1)
def forward(self, x):
x = self.conv1(x)
x = self.pool(x)
x = self.conv2(x)
x = self.pool(x)
x = self.conv3(x)
x = self.relu(x)
x = self.conv4(x)
x = self.soft(x)
x = x.view(x.size()[0], -1)
return x
class EarlyStopping:
"""earlystopping class"""
def __init__(self, patience=5, verbose=False, path='checkpoint_model.pth'):
self.patience = patience
self.verbose = verbose
self.counter = 0
self.best_score = None
self.early_stop = False
self.val_loss_min = np.Inf
self.path = path
def __call__(self, valid_loss):
score = -valid_loss
if self.best_score is None:
self.best_score = score
self.checkpoint(valid_loss)
elif score - self.best_score < 0.0001:
self.counter += 1
if self.verbose:
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.checkpoint(valid_loss)
self.counter = 0
def checkpoint(self, valid_loss):
if self.verbose:
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {valid_loss:.6f}).')
self.val_loss_min = valid_loss
earlystopping = EarlyStopping(patience=5, verbose=True)
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.00005, momentum=0.9, weight_decay=0.005)
train_loss=[]
train_acc=[]
val_loss=[]
val_acc=[]
if __name__ == '__main__':
BATCH_SIZE = 8
start = time.time()
for epoch in range(10000):
print('epoch', epoch+1)
sum_loss = 0.0
sum_correct = 0
sum_total = 0
#Training
net = net.train()
for (inputs, labels) in trainloader:
#inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
sum_loss += loss.item() #Add loss
_, predicted = outputs.max(1)
sum_total += labels.size(0)
sum_correct += (predicted == labels).sum().item()
loss.backward()
optimizer.step()
print("train mean loss={}, accuracy={}"
.format(sum_loss*BATCH_SIZE/len(trainloader.dataset), float(sum_correct/sum_total)))
train_loss.append(sum_loss*BATCH_SIZE/len(trainloader.dataset))
train_acc.append(float(sum_correct/sum_total))
sum_loss = 0.0
sum_correct = 0
sum_total = 0
#Validating
net = net.eval()
for (inputs, labels) in valloader:
#inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
sum_loss += loss.item()
_, predicted = outputs.max(1)
sum_total += labels.size(0)
sum_correct += (predicted == labels).sum().item()
loss.backward()
optimizer.step()
print("valid mean loss={}, accuracy={}"
.format(sum_loss*BATCH_SIZE/len(valloader.dataset), float(sum_correct/sum_total)))
val_loss.append(sum_loss*BATCH_SIZE/len(valloader.dataset))
val_acc.append(float(sum_correct/sum_total))
#Early stop
earlystopping(val_loss[-1])
if earlystopping.early_stop:
print("Early Stopping!")
break
finish_time = time.time() - start
#Test
net.test()
test_loss = 0
correct = 0
with torch.no_grad():
for (inputs, labels) in testloader:
output = net(inputs)
test_loss += criterion(output, labels).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(labels.view_as(pred)).sum().item()
test_loss /= 10000
test_acc = correct / 10000
And this is the resulting image.
https://i.stack.imgur.com/eln4S.png
The loss function converges without going below 1.5, and earlystopping terminates machine learning.
Since I have done a similar task (with a different but comparable network) I know that the Loss when using cross_entropy decreases lower than 1.5. In my case accuracy was ~95% and the loss ~0.200. When inspecting your picture I saw that for the test loss (in the picture you named it Loss evaluate) the value matches with what one should expect for train/val loss after some epochs. So there is most likely an error when computing the loss for the training and validation phases.
You compute the loss for a certain epoch like this:
train_loss.append(sum_loss*BATCH_SIZE/len(trainloader.dataset))
len(trainloader.dataset) does not return number of batches but rather number of samples, thus using BATCH_SIZE for computing the loss is correct and necessary. See here.
Since Cross_Entropy by default reports the mean for the batch (see here) using
sum_loss*BATCH_SIZE/len(trainloader.dataset)
is correct if the trainloader actually has a Batch-size equal to the one defined in the provided code. Since you did not share the part where the trainloader was defined I cannot be sure.
What indicates this behaviour is that in the test part you don't use len(testloader.dataset) but rather hardcoded 10000 to get the mean for the epoch, I think that the dataloader (for all train, validate and testing) may not have batch size equal to BATCH_SIZE and is most likely 1.
Related
Thanks for your time~ after I run the code, the grad is always zero and the loss is not updating.(I guess it's because the weights is initialized all 0's, but I don't know how to fix it) The code is a basic neural network:
class Model(torch.nn.Module): #class
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8,6)
self.linear2 = torch.nn.Linear(6,4)
self.linear3 = torch.nn.Linear(4,1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
x = F.softmax(x, dim=1)
return x
model = Model() #model
criterion = torch.nn.BCELoss(size_average = False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(1000): # training
y_pred = model(X_train.float())
loss = criterion(y_pred, y_train.float())
print(epoch, loss.item())
print([x.grad for x in optimizer.param_groups[0]['params']])
optimizer.zero_grad()
loss.backward()
optimizer.step()
And I get the 0s grad:
I think your forgot to apply the backpropagation. Adding loss.backward() just before your print statetements will do the trick (compute the accumulated gradients and store them in x.grad). Note that by default your weights are not initialized to 0 here. The default initialization for linear layers are here.
I have a dataset as shown in this picture: Dataset
I want to to approximate a function to fit the data and make predictions on my test dataset, I have tried to use a Neural Network as shown, but I get huge training loss, what should I change ? Is it just a problem of layers and neurons ?
class NeuralNet(nn.Module):
def __init__(self,
input_dimension,
output_dimension,
n_hidden_layers,
neurons,
regularization_param,
regularization_exp,
retrain_seed,
activation_name):
super(NeuralNet, self).__init__()
# Number of input dimensions n
self.input_dimension = input_dimension
# Number of output dimensions m
self.output_dimension = output_dimension
# Number of neurons per layer
self.neurons = neurons
# Number of hidden layers
self.n_hidden_layers = n_hidden_layers
# Activation function
self.activation_name = activation_name
self.activation = self.get_activation(activation_name)
# Regularization parameter
self.regularization_param = regularization_param
# Regularization exponent
self.regularization_exp = regularization_exp
# Random seed for weight initialization
self.retrain_seed = retrain_seed
if self.n_hidden_layers != 0:
self.input_layer = nn.Linear(self.input_dimension, self.neurons)
self.hidden_layers = nn.ModuleList([nn.Linear(self.neurons, self.neurons) for _ in range(n_hidden_layers - 1)])
self.output_layer = nn.Linear(self.neurons, self.output_dimension)
else:
print("Simple linear regression")
self.linear_regression_layer = nn.Linear(self.input_dimension, self.output_dimension)
self.init_xavier()
def init_xavier(self):
torch.manual_seed(self.retrain_seed)
def init_weights(m):
if type(m) == nn.Linear and m.weight.requires_grad and m.bias.requires_grad:
if self.activation_name in ['tanh', 'relu']:
gain = nn.init.calculate_gain(self.activation_name)
else:
gain = 1
torch.nn.init.xavier_uniform_(m.weight, gain=gain)
m.bias.data.fill_(0)
self.apply(init_weights)
def regularization(self):
reg_loss = 0
for name, param in self.named_parameters():
if 'weight' in name:
reg_loss = reg_loss + torch.norm(param, self.regularization_exp)
return reg_loss
def get_activation(self, activation_name):
if activation_name in ['tanh']:
return nn.Tanh()
elif activation_name in ['relu']:
return nn.ReLU(inplace=True)
elif activation_name in ['lrelu']:
return nn.LeakyReLU(inplace=True)
elif activation_name in ['sigmoid']:
return nn.Sigmoid()
elif activation_name in ['softplus']:
return nn.Softplus(beta=4)
elif activation_name in ['celu']:
return nn.CELU()
else:
raise ValueError('Unknown activation function')
def forward(self, x):
# The forward function performs the set of affine and non-linear transformations defining the network
# (see equation above)
if self.n_hidden_layers != 0:
x = self.activation(self.input_layer(x))
for k, l in enumerate(self.hidden_layers):
x = self.activation(l(x))
return self.output_layer(x)
else:
return self.linear_regression_layer(x)
my_network = NeuralNet(input_dimension=train1.shape[1], output_dimension=test1.shape[1], n_hidden_layers=8, neurons=500,regularization_param= 1e-4,
regularization_exp=2,
retrain_seed=134,
activation_name="tanh")
my_network.double()
optimizer_ = optim.Adam(my_network.parameters(), lr=0.0001, weight_decay=0)
def fit(model, training_set, x_validation_, y_validation_, num_epochs, optimizer, p, verbose=True):
history = [[], []]
regularization_param = model.regularization_param
# Loop over epochs
for epoch in range(num_epochs):
if verbose: print("################################ ", epoch, " ################################")
running_loss = list([0])
# Loop over batches
for j, (x_train_, u_train_) in enumerate(training_set):
def closure():
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
u_pred_ = model(x_train_)
loss_u = torch.mean((u_pred_ - u_train_) ** p)
loss_reg = model.regularization()
loss = loss_u + regularization_param * loss_reg
loss.backward()
# Compute average training loss over batches for the current epoch
running_loss[0] += loss.item() / len(training_set)
return loss
optimizer.step(closure=closure)
y_validation_pred_ = model(x_validation_)
validation_loss = torch.mean((y_validation_pred_.reshape(-1, ) - y_validation_.reshape(-1, )) ** p).item()
history[0].append(running_loss[0])
history[1].append(validation_loss)
if verbose:
print('Training Loss: ', np.round(running_loss[0], 8))
print('Validation Loss: ', np.round(validation_loss, 8))
print('Final Training Loss: ', np.round(history[0][-1], 8))
print('Final Validation Loss: ', np.round(history[1][-1], 8))
return history
n_epochs = 2000
history = fit(my_network, training_set1, x_val1, y_val1, n_epochs, optimizer_, p=2, verbose=False )
Sorry for the long code, I thought it was important to put it all to give you all better information on how to help me.
I have CSI data with the following shape (5520, 90, 200) being samples, the stacked antennas and subcarriers and time frame. I am trying to build a variational autoencoder however I keep getting stuck on the training part as it says that the matrixes I am inputting cannot be multiplied.
class Encoder(nn.Module):
def __init__(self, z_dim):
super(Encoder, self).__init__()
x_dim = 90
# Layer 1
self.encoder = nn.Sequential()
self.encoder.add_module('fc1', nn.Linear(x_dim, 360))
self.encoder.add_module('Sigmoid1', nn.Sigmoid())
self.encoder.add_module('BN1', nn.BatchNorm1d(360))
self.encoder.add_module('DO1', nn.Dropout())
# Layer 2
self.encoder.add_module('fc2', nn.Linear(360, 50))
self.encoder.add_module('Sigmoid2', nn.Sigmoid())
self.encoder.add_module('BN2', nn.BatchNorm1d(50))
self.encoder.add_module('DO2', nn.Dropout())
# Latent
self.k_mu = nn.Linear(50, z_dim)
self.k_sigma = nn.Linear(50, z_dim)
def reparameterize(self, mean, logvar):
eps = np.random.normal(shape=mean.shape)
return eps * tf.exp(logvar * .5) + mean
def forward(self, x):
h = self.encoder(x)
k_mu = self.k_mu(h)
k_sigma = self.k_sigma(h)
z_dim = reparameterize(k_mu, log(k_sigma))
return z_dim
class Decoder(nn.Module):
def __init__(self, z_dim):
super(Decoder, self).__init__()
x_dim = 90
#1
self.decoder = nn.Sequential()
self.decoder.add_module('fc3', nn.Linear(z_dim, 50))
self.decoder.add_module('Sigmoid3', nn.Sigmoid())
self.decoder.add_module('BN3', nn.BatchNorm1d(50))
self.decoder.add_module('DO3', nn.Dropout())
#2
self.decoder.add_module('fc4', nn.Linear(50, 360))
self.decoder.add_module('Sigmoid4', nn.Sigmoid())
self.decoder.add_module('BN4', nn.BatchNorm1d(360))
self.decoder.add_module('DO4', nn.Dropout())
self.decoder.add_module('x_recon', nn.Linear(360, x_dim))
def forward(self, z):
x_ = self.decoder(z)
return x_
class VAE(nn.Module):
def __init__(self, z_dim):
super(VAE, self).__init__()
self.encoder = Encoder(z_dim)
self.decoder = Decoder(z_dim)
def sample(self, z_mu, z_log_var):
z_std = torch.exp(z_log_var)
eps = torch.randn_like(z_std)
z = z_mu + z_std*eps
return z
def forward(self, x):
# Encode
z_mu, z_log_var = self.encoder(x)
# Sample
if self.training:
# Sample if we are training
z = self.sample(z_mu, z_log_var)
else:
z = z_mu
# Decode
x_ = self.decoder(z)
return x_, z_mu, z_log_var
vae = VAE(z_dim=10).cuda()
optimizer = optim.Adam(vae.parameters())
epochs = 20 #Number of epochs
# Loop over epochs
for epoch in range(1, epochs + 1):
vae.train()
train_loss = 0
# Loop over batches. Note that the labels are ignored in this case
for batch_idx, (data) in enumerate(train_loader):
data = data.cuda()
optimizer.zero_grad()
data = data.view(data.size(1), -1)
x_recon, z_mu, z_log_var = vae(data)
# Calculate the loss
KL_loss, E_loss = loss_function(x_recon, data, z_mu, z_log_var)
loss = KL_loss + E_loss
loss.backward()
# Accumulate the loss to calculate the average
train_loss += loss.item()
# Step the optimizer
optimizer.step()
if batch_idx % 100 == 0:
print('Epoch: {} [{}/{} ({:.0f}%)]\tKL Loss: {:.6f}\tBC Loss: {:.6f}\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), KL_loss.item() / len(data), E_loss.item() / len(data), loss.item() / len(data)))
print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset)))
And I get the following error: mat1 and mat2 shapes cannot be multiplied (90x12800 and 90x360)
I cannot figure out why I am getting it.
I am training simple variational autoencoder with negative binomial likelihood for decoder. I used python 3.7.1 and tensorflow 2.0.0.
The model was trained well without any problems for tens of epochs, but all weights, loss, and gradients suddenly became NaN during training. I modified the code to find which variables become NaN first (among weights, loss, and gradients) and found that gradients first became nan and this affected other variables.
I have googled similar issues but most of case nan appeared in the loss, which is different from this case. I tried: smaller learning rate, clipping loss... but nothing could resolve the problem.
Here is the model class for autoencoder model:
class Encoder(tf.keras.layers.Layer):
def __init__(self, hidden_dim, latent_dim):
super(Encoder, self).__init__()
self.encoder_fc_1 = tf.keras.layers.Dense(hidden_dim, activation=tf.nn.leaky_relu)
self.encoder_fc_2 = tf.keras.layers.Dense(hidden_dim, activation=tf.nn.leaky_relu)
self.encoder_latent = tf.keras.layers.Dense(latent_dim + latent_dim)
def call(self, input):
h = tf.math.l2_normalize(input, 1)
h = self.encoder_fc_1(h)
h = self.encoder_fc_2(h)
return tf.split(self.encoder_latent(h), num_or_size_splits=2, axis=1)
class Decoder(tf.keras.layers.Layer):
def __init__(self, hidden_dim, vocab_size):
super(Decoder, self).__init__()
self.decoder_fc_1 = tf.keras.layers.Dense(hidden_dim, activation=tf.nn.leaky_relu)
self.decoder_fc_2 = tf.keras.layers.Dense(hidden_dim, activation=tf.nn.leaky_relu)
self.decoder_fc_3 = tf.keras.layers.Dense(vocab_size + vocab_size)
def call(self, z):
h = self.decoder_fc_1(z)
h = self.decoder_fc_2(h)
return tf.split(self.decoder_fc_3(h), num_or_size_splits=2, axis=1)
class NBVAE(tf.keras.Model):
def __init__(self, config):
super(NBVAE, self).__init__()
self.optimizer = tf.keras.optimizers.Adam(config["learning_rate"])
self.encoder = Encoder(config["hidden_dim"], config["latent_dim"])
self.decoder = Decoder(config["hidden_dim"], config["vocab_size"])
def call(self, input):
mean, logvar = self.encoder(input)
z = reparameterize(mean, logvar)
h_r, h_p = self.decoder(z)
return mean, logvar, z, h_r, h_p
def reparameterize(mean, logvar):
eps = tf.random.normal(shape=mean.shape)
return tf.add(tf.multiply(eps, tf.math.exp( tf.math.divide(logvar, 2))), mean)
def log_normal_pdf(sample, mean, logvar, raxis=1):
log2pi = tf.math.log(2. * np.pi)
return tf.reduce_sum(-.5 * ((sample - mean) ** 2. * tf.exp(-logvar) + logvar + log2pi), axis=raxis)
def compute_logpx_z(input, h_r, h_p):
temp = tf.exp(-tf.multiply(tf.exp(h_r), tf.math.log(tf.exp(h_p) + 1)))
temp_cliped = tf.clip_by_value(temp, 1e-5, 1 - 1e-5)
ll = tf.multiply(input, tf.math.log(1 - temp_cliped)) + tf.multiply(1 - input, tf.math.log(temp_cliped))
#print("logpx_z: {}".format(tf.reduce_sum(ll, axis=-1)))
return tf.reduce_sum(ll, axis=-1), temp
def compute_loss(model, input):
mean, logvar, z, h_r, h_p = model(input)
logpx_z, temp = compute_logpx_z(input, h_r, h_p)
logpz = log_normal_pdf(z, 0., 0.)
logqz_x = log_normal_pdf(z, mean, logvar)
return tf.negative(tf.reduce_mean(logpx_z + logpz - logqz_x)), temp
and here is the code snippet for training the model.
I put some if statements in the middle of the code to check which variable become NaN first.
print("start training...")
num_batches = int(np.ceil(len(training_data) / batch_size))
epoch_loss = []
for epoch in range(epochs):
print("epoch: {}".format(epoch+1))
progbar = tf.keras.utils.Progbar(num_batches)
loss_record = []
for i in range(num_batches):
x_batch = training_data[i*batch_size:(i+1)*batch_size]
x_batch = one_hot(x_batch, depth=len(concept2id))
with tf.GradientTape() as tape:
loss, temp = compute_loss(nbvae, x_batch)
print("step{s} loss: {l}".format(s=i, l=loss.numpy()))
# checking the loss
if np.isnan(loss.numpy()):
print("nan loss is detected")
detect_nan = True
break
loss_record.append(loss.numpy())
gradients = tape.gradient(loss, nbvae.trainable_variables)
#gradients, global_norm = tf.clip_by_global_norm(tape.gradient(loss, nbvae.trainable_variables), 10)
print("checking gradients...")
gradient_nancount = 0
for _, grad in enumerate(gradients):
gradient_nancount += np.sum(np.isnan(grad))
if gradient_nancount != 0:
print("nan is detected in gradients")
print("saving the current gradients and weights...")
save_data(os.path.join(output_path, "error_gradients.pkl"), gradients)
save_data(os.path.join(output_path, "error_tvariables.pkl"), nbvae.trainable_variables)
detect_nan = True
break
nbvae.optimizer.apply_gradients(zip(gradients, nbvae.trainable_variables))
print("checking the updated weights...")
weight_nancount = 0
for _, weight in enumerate(nbvae.weights):
weight_nancount += np.sum(np.isnan(weight))
if weight_nancount != 0:
print("nan is detected in weights")
print("saving the current gradients and weights...")
save_data(os.path.join(output_path, "error_gradients.pkl"), gradients)
save_data(os.path.join(output_path, "error_tvariables.pkl"), nbvae.trainable_variables)
detect_nan = True
break
progbar.add(1)
if detect_nan:
epoch_loss.append(np.nan)
nbvae.save_weights(os.path.join(output_path, "nbvae_error{}".format(epoch+1)))
break
print("average epoch loss: {}".format(np.mean(loss_record)))
epoch_loss.append(np.mean(loss_record))
Anyone knows the way to resolve this problem or possible reasons? Thank you for your time in advance.
I tried to put a droprate to my neural network (NN) using torch and I got a strange error at the end. How can I fix it?
So the idea is that I wrote a NN inside a function to make it easier to call. The function is the following:
(I personally think the problem lies inside the class of the NN, but for the sake of having a working example I'm putting everything).
def train_neural_network(data_train_X, data_train_Y, batch_size, learning_rate, graph = True, dropout = 0.0 ):
input_size = len(data_test_X.columns)
hidden_size = 200
num_classes = 4
num_epochs = 120
batch_size = batch_size
learning_rate = learning_rate
# The class of NN
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, p = dropout):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, num_classes)
def forward(self, x, p = dropout):
out = F.relu(self.fc1(x))
out = F.relu(self.fc2(out))
out = nn.Dropout(out, p) #drop
out = self.fc3(out)
return out
# Prepare data
X_train = torch.from_numpy(data_train_X.values).float()
Y_train = torch.from_numpy(data_train_Y.values).float()
# Loading data
train = torch.utils.data.TensorDataset(X_train, Y_train)
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size)
net = NeuralNet(input_size, hidden_size, num_classes)
# Loss
criterion = nn.CrossEntropyLoss()
# Optimiser
optimiser = torch.optim.SGD(net.parameters(), lr=learning_rate)
# Proper training
total_step = len(train_loader)
loss_values = []
for epoch in range(num_epochs+1):
net.train()
train_loss = 0.0
for i, (predictors, results) in enumerate(train_loader, 0):
# Forward pass
outputs = net(predictors)
results = results.long()
results = results.squeeze_()
loss = criterion(outputs, results)
# Backward and optimise
optimiser.zero_grad()
loss.backward()
optimiser.step()
# Update loss
train_loss += loss.item()
loss_values.append(train_loss / batch_size )
print('Finished Training')
return net
And when I call the function:
net = train_neural_network(data_train_X = data_train_X, data_train_Y = data_train_Y, batch_size = batch_size, learning_rate = learning_rate, dropout = 0.1)
The error is the following:
net = train_neural_network(data_train_X = data_train_X, data_train_Y = data_train_Y, batch_size = batch_size, learning_rate = learning_rate, dropout = 0.1)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/dropout.py in __init__(self, p, inplace)
8 def __init__(self, p=0.5, inplace=False):
9 super(_DropoutNd, self).__init__()
---> 10 if p < 0 or p > 1:
11 raise ValueError("dropout probability has to be between 0 and 1, "
12 "but got {}".format(p))
RuntimeError: bool value of Tensor with more than one value is ambiguous
Why do you think there is an error?
Before putting the droprate, everything was working. Additional points for you if you know how to
implement a bias inside my network! For example, on the hidden layer. I can't find any example online.
Change your architecture for this:
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, p=dropout):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, num_classes)
self.dropout = nn.Dropout(p=p)
def forward(self, x):
out = F.relu(self.fc1(x))
out = F.relu(self.fc2(out))
out = self.dropout(self.fc3(out))
return out
Let me know if it works.