I state that I am new on PyTorch. I wrote this simple program for binary classification. I also created the CSV with two columns of random values, with the "ok" column whose value is 1 only if the other two values are included between two values I decided at the same time. Example:
diam_int,diam_est,ok
37.782,125.507,0
41.278,115.15,1
42.248,115.489,1
29.582,113.141,0
37.428,107.247,0
32.947,123.233,0
37.146,121.537,0
38.537,110.032,0
26.553,113.752,0
27.369,121.144,0
41.632,108.178,0
27.655,111.279,0
29.779,109.268,0
43.695,115.649,1
44.587,116.126,0
It seems to me all done correctly, loss actually lowers (it comes back up slightly after many epochs but I don't think it's a problem), but when I try to test my Net after the training, with a sample batch of the trainset, what I got is always a prediction below 0.5 (so always 0 as estimated output) with a completely random trend.
with torch.no_grad():
pred = net(trainSet[10])
trueVal = ySet[10]
for i in range(len(trueVal)):
print(trueVal[i], pred[i])
Here is my Net class:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self) :
super().__init__()
self.fc1 = nn.Linear(2, 32)
self.fc2 = nn.Linear(32, 64)
self.fc3 = nn.Linear(64, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return torch.sigmoid(x)
Here is my Main class:
import torch
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
from net import Net
df = pd.read_csv("test.csv")
y = torch.Tensor(df["ok"])
ySet = torch.split(y, 32)
df.drop(["ok"], axis=1, inplace=True)
data = F.normalize(torch.Tensor(df.values), dim=1)
trainSet = torch.split(data, 32)
net = Net()
optimizer = optim.Adam(net.parameters(), lr=0.001)
lossFunction = torch.nn.BCELoss()
EPOCHS = 300
for epoch in range(EPOCHS):
for i, X in enumerate(trainSet):
optimizer.zero_grad()
output = net(X)
target = ySet[i].reshape(-1, 1)
loss = lossFunction(output, target)
loss.backward()
optimizer.step()
if epoch % 20 == 0:
print(loss)
What am I doing wrong? Thanks in advance for the help
Your model is underfit. Increasing the number of epochs to (say) 3000 makes the model predict perfectly on the examples you showed.
However after this many epochs the model may be overfit. A good practice is to use validation data (separate the generated data into train and validation sets), and check the validation loss in each epoch. When the validation loss starts increasing you start overfitting and stop the training.
Related
As a trial, I'm implementing Xception to classify images without using pretrained weight in Tensorflow.
However, the accuracy are too low compared to the original paper.
Could somebody share any advice to address this problem?
I prepared 500 out of 1000 classes from ImageNet and train ready-Xception model with this data from scrach .
I tried the same learning rate and optimizer as used in the original paper.
– Optimizer: SGD
– Momentum: 0.9
– Initial learning rate: 0.045
– Learning rate decay: decay of rate 0.94 every 2 epochs
However, this did not work so well.
I know it is better to use all of 1000 classes rather than only 500, however, I couldn't prepare storage for it.
Did it affect the performance of my code?
Here is my code.
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers, losses, models, optimizers, callbacks, applications, preprocessing
# scheduler
def scheduler(epoch, lr):
return 0.045*0.94**(epoch/2.0)
lr_decay = callbacks.LearningRateScheduler(scheduler)
# early stopping
EarlyStopping = callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=500, verbose=0, mode='auto', restore_best_weights=True)
# build xception
inputs = tf.keras.Input(shape=(224, 224, 3))
x = tf.cast(inputs, tf.float32)
x = tf.keras.applications.xception.preprocess_input(x) #preprocess image
x = applications.xception.Xception(weights=None, include_top=False,)(x, training=True)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(nb_class)(x)
outputs = layers.Softmax()(x)
model = tf.keras.Model(inputs, outputs)
model.compile(optimizer=optimizers.SGD(momentum=0.9, nesterov=True),
loss = 'categorical_crossentropy',
metrics= ['accuracy'])
# fitting data
history = model.fit(image_gen(df_train_chunk, 224, 224, ), #feed images with a generator
batch_size = 32,
steps_per_epoch = 64,
epochs=1000000000,
validation_data = image_gen(df_valid_chunk, 224, 224, ), #feed images with a generator
validation_steps = 64,
callbacks = [lr_decay, EarlyStopping],
)
My results are below. In the original paper, its accuracy reached around 0.8.
In contrast, the performance of my code is too poor.
P.S.
Some might wonder if my generator got wrong, so I put my generator code and result below.
from PIL import Image, ImageEnhance, ImageOps
def image_gen(df_data, h, w, shuffle=True):
nb_class = len(np.unique(df_data['Class']))
while True:
if shuffle:
df_data = df_data.sample(frac=1)
for i in range(len(df_data)):
X = Image.open((df_data.iloc[i]).loc['Path'])
X = X.convert('RGB')
X = X.resize((w,h))
X = preprocessing.image.img_to_array(X)
X = np.expand_dims(X, axis=0)
klass = (df_data.iloc[i]).loc['Class']
y = np.zeros(nb_class)
y[klass] = 1
y = np.expand_dims(y, axis=0)
yield X, y
train_gen = image_gen(df_train_chunk, 224, 224, )
for i in range(5):
X, y = next(train_gen)
print('\n\n class: ', y.argmax(-1))
display(Image.fromarray(X.squeeze(0).astype(np.uint8)))
the result is below.
When you chose only 500 labels, do you choose the first 500?
softmax output starting from 0, so make sure your labels staring from 0 to 499 either.
This is my first time posting in stack overflow so forgive me if I do any sort of mistake.
I have 10000 data, and each data has a label of 0 and 1. I want to perform classification using LSTM as this is time series data.
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1
# Here we define our model as a class
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
#Initialize hidden layer and cell state
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
# We need to detach as we are doing truncated backpropagation through time (BPTT)
# If we don't, we'll backprop all the way to the start even after going through another batch
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
# Index hidden state of last time step
# out.size() --> 100, 32, 100
# out[:, -1, :] --> 100, 100 --> just want last time step hidden states!
out = self.fc(out[:, -1, :])
# For binomial Classification
m = torch.sigmoid(out)
return m
model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
loss = nn.BCELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.00001, weight_decay=0.00006)
num_epochs = 100
# Number of steps to unroll
seq_dim =look_back-1
for t in range(num_epochs):
y_train_class = model(x_train)
output = loss(y_train_class, y_train)
# Zero out gradient, else they will accumulate between epochs
optimiser.zero_grad(set_to_none=True)
# Backward pass
output.backward()
# Update parameters
optimiser.step()
This is an example of what the result looks like
This code is initially from kaggle, I edited them for classification. Please, can you tell me what I am doing wrong?
EDIT 1:
Add dataloader
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
x_train = torch.from_numpy(x_train).type(torch.Tensor)
y_train = torch.from_numpy(y_train).type(torch.Tensor)
x_test = torch.from_numpy(x_test).type(torch.Tensor)
y_test = torch.from_numpy(y_test).type(torch.Tensor)
train_dataloader = DataLoader(TensorDataset(x_train, y_train), batch_size=128, shuffle=True)
test_dataloader = DataLoader(TensorDataset(x_test, y_test), batch_size=128, shuffle=True)
I realized I had forgotten to inverse the transformation before checking the result. When I did that, I got different values from classification, however all values are in the scale of 0.001-0.009, so when I round them, the result is same. Label 0 for all classification.
A common phenomenon in NN training is that they will initially converge to a very naive solution to the problem where they output a constant prediction that minimizes the error on the training data. My guess is that in your training data, the ratio between 0 and 1 classes is close to 0.5423. Depending on whether your model is of sufficient complexity, it might learn to make more specific predictions based on the input when given more learning steps.
While increasing the number of epochs could help, there is something better you can do with your current setup. Currently, you are only performing a single optimizer step per epoch. Typically, you would want a step per batch and loop over your data in (mini)batches of, say, 32 inputs for example. To do this, it would be best to use a DataLoader where you can define a batch size, and loop over the dataloader inside your epoch loop similar to this example.
Note: Long Post. Please bear with me
I have implemented a convolution neural network in PyTorch on KMNIST dataset. I need to use SMAC to optimize the learning rate and the momentum of Stochastic Gradient Descent of the CNN. I am new in hyperparameter optimization and what I learnt from the smac documentation is,
SMAC evaluates the algorithm to be optimized by invoking it through a Target Algorithm Evaluator (TAE).
We need a Scenario-object to configure the optimization process.
run_obj parameter in Scenario object specifies what SMAC is supposed to optimize.
My Ultimate goal is to get a good accuracy or low loss
This is what I have done so far:
Convolution Neural Network
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
from datasets import *
import torch.utils.data
import torch.nn.functional as F
import matplotlib.pyplot as plt
# Create the model class
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__() # to inherent the features of nn.Module
self.cnn1 = nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size = 3, stride = 1, padding =1)
# in_channels =1 because of grey scale image
# kernel_size = feature_size
# padding = 1 because for same padding = [(filter_size -1)/2]
# the output size of the 8 feature maps is [(input_size - filter_size +2(padding)/stride)+1]
#Batch Normalization
self.batchnorm1 = nn.BatchNorm2d(8)
# RELU
self.relu = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size =2)
# After maxpooling, the output of each feature map is 28/2 =14
self.cnn2 = nn.Conv2d(in_channels = 8, out_channels = 32, kernel_size = 5, stride = 1, padding =2)
#Batch Normalization
self.batchnorm2 = nn.BatchNorm2d(32)
# RELU
#self.relu = nn.ReLU()
self.maxpool2 = nn.MaxPool2d(kernel_size =2)
# After maxpooling , the output of each feature map is 14/2 =7of them is of size 7x7 --> 32*7*7=1568
# Flatten the feature maps. You have 32 feature maps, each
self.fc1 = nn.Linear(in_features=1568, out_features = 600)
self.dropout = nn.Dropout(p=0.5)
self.fc2 = nn.Linear(in_features=600, out_features = 10)
def forward(self,x):
out = self.cnn1(x)
#out = F.relu(self.cnn1(x))
out = self.batchnorm1(out)
out = self.relu(out)
out = self.maxpool1(out)
out = self.cnn2(out)
out = self.batchnorm2(out)
out = self.relu(out)
out = self.maxpool2(out)
#Now we have to flatten the output. This is where we apply the feed forward neural network as learned
#before!
#It will the take the shape (batch_size, 1568) = (100, 1568)
out = out.view(-1, 1568)
#Then we forward through our fully connected layer
out = self.fc1(out)
out = self.relu(out)
out = self.dropout(out)
out = self.fc2(out)
return out
def train(model, train_loader, optimizer, epoch, CUDA, loss_fn):
model.train()
cum_loss=0
iter_count = 0
for i, (images, labels) in enumerate(train_load):
if CUDA:
images = Variable(images.cuda())
images = images.unsqueeze(1)
images = images.type(torch.FloatTensor)
images = images.cuda()
labels = Variable(labels.cuda())
labels = labels.type(torch.LongTensor)
labels = labels.cuda()
else:
images = Variable(images)
images = images.unsqueeze(1)
images = images.type(torch.DoubleTensor)
labels = Variable(labels)
labels = labels.type(torch.DoubleTensor)
optimizer.zero_grad()
outputs = model(images)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
cum_loss += loss
if (i+1) % batch_size == 0:
correct = 0
total = 0
acc = 0
_, predicted = torch.max(outputs.data,1)
total += labels.size(0)
if CUDA:
correct += (predicted.cpu()==labels.cpu()).sum()
else:
correct += (predicted==labels).sum()
accuracy = 100*correct/total
if i % len(train_load) == 0:
iter_count += 1
ave_loss = cum_loss/batch_size
return ave_loss
batch_size = 100
epochs = 5
e = range(epochs)
#print(e)
#Load datasets
variable_name=KMNIST()
train_images = variable_name.images
train_images = torch.from_numpy(train_images)
#print(train_images.shape)
#print(type(train_images))
train_labels = variable_name.labels
train_labels = torch.from_numpy(train_labels)
#print(train_labels.shape)
#print(type(train_labels))
train_dataset = torch.utils.data.TensorDataset(train_images, train_labels)
# Make the dataset iterable
train_load = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
print('There are {} images in the training set' .format(len(train_dataset)))
print('There are {} images in the loaded training set' .format(len(train_load)))
def net(learning_rate, Momentum):
model = CNN()
CUDA = torch.cuda.is_available()
if CUDA:
model = model.cuda()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate,momentum = Momentum, nesterov= True)
iteration = 0
total_loss=[]
for epoch in range(epochs):
ave_loss = train(model, train_load, optimizer, epoch, CUDA, loss_fn)
total_loss.append(ave_loss)
return optimizer, loss_fn, model, total_loss
optimizer, loss_fn, model, total_loss = net(learning_rate= 0.01, Momentum = 0.09)
# Print model's state_dict
print("---------------")
print("Model's state_dict:")
for param_tensor in model.state_dict():
print(param_tensor, "\t", model.state_dict()[param_tensor].size())
print("---------------")
#print("Optimizer's state_dict:")
#for var_name in optimizer.state_dict():
# print(var_name, "\t", optimizer.state_dict()[var_name])
torch.save(model.state_dict(), "kmnist_cnn.pt")
plt.plot(e, (np.array(total_loss)))
plt.xlabel("# Epoch")
plt.ylabel("Loss")
plt.show()
print('Done!')
smac hyperparameter optimization:
from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
UniformFloatHyperparameter, UniformIntegerHyperparameter
from smac.configspace.util import convert_configurations_to_array
#from ConfigSpace.conditions import InCondition
# Import SMAC-utilities
from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC
# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace()
# We define a few possible types of SVM-kernels and add them as "kernel" to our cs
lr = UniformFloatHyperparameter('learning_rate', 1e-4, 1e-1, default_value='1e-2')
momentum = UniformFloatHyperparameter('Momentum', 0.01, 0.1, default_value='0.09')
cs.add_hyperparameters([lr, momentum])
def kmnist_from_cfg(cfg):
cfg = {k : cfg[k] for k in cfg if cfg[k]}
print('Config is', cfg)
#optimizer, loss_fn, model, total_loss = net(**cfg)
#optimizer, loss_fn, model, total_loss = net(learning_rate= cfg["learning_rate"], Momentum= cfg["Momentum"])
optimizer, loss_fn, model, total_loss = net(learning_rate= 0.02, Momentum= 0.05)
return optimizer, loss_fn, model, total_loss
# Scenario object
scenario = Scenario({"run_obj": "quality", # we optimize quality (alternatively runtime)
"runcount-limit": 200, # maximum function evaluations
"cs": cs, # configuration space
"deterministic": "true"
})
#def_value = kmnist_from_cfg(cs.get_default_configuration())
#print("Default Value: %.2f" % (def_value))
# Optimize, using a SMAC-object
print("Optimizing! Depending on your machine, this might take a few minutes.")
smac = SMAC(scenario=scenario,tae_runner=kmnist_from_cfg) #rng=np.random.RandomState(42)
smac.solver.intensifier.tae_runner.use_pynisher = False
print("SMAC", smac)
incumbent = smac.optimize()
inc_value = kmnist_from_cfg(incumbent)
print("Optimized Value: %.2f" % (inc_value))
When I give loss as the run_obj parameter, I get the error message
ArgumentError: argument --run-obj/--run_obj: invalid choice: 'total_loss' (choose from 'runtime', 'quality')
To be honest, I do not know what does "quality" means. Anyways, when I give quality as the run_obj parameter, I get the error message
TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
If I understood it correctly, the above error message is obtained when an int is expected but str is given. To check whether the problem was with configuration space, I tried
optimizer, loss_fn, model, total_loss = net(learning_rate= 0.02, Momentum= 0.05)
instead of these:
optimizer, loss_fn, model, total_loss = net(**cfg)
optimizer, loss_fn, model, total_loss = net(learning_rate= cfg["learning_rate"], Momentum= cfg["Momentum"])
the error remains the same.
Any ideas on how to use smac to optimize hyperparameters of CNN and why do I get this error message? I tried looking for similar problems online. This post was a little helpful. Unfortunately, since there is no implementation of smac on NN (at least I did not find it), I cannot figure out the solution. I ran out of all ideas.
Any help, ideas or useful link is appreciated.
Thank you!
I believe the tae_runner (kmnist_from_cfg in your case) has to be a callable that takes a configuration space point, which you correctly provide, and outputs a single number. You output a tuple of things. Perhaps only return the total_loss on the validation set? I am basing this on the svm example in the smac github at https://github.com/automl/SMAC3/blob/master/examples/svm.py.
Let's say I have the following constraints and the network:
The architecture is fixed (see this image) (note that there are no biases)
Activation function for the hidden layer is ReLU
There's no activation function for the output layer (should just return the sum of the inputs it receive).
I tried to implement this in pytorch with various initialization schemes and different data sets but I failed (the code is at the bottom).
My questions are:
Is there anything wrong with my NN training process?
Is this a feasible problem? If yes, how?
If this is doable, can we still achieve that by constraining the weights to be in the set {-1, 0, 1}
Code:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data_utils
import numpy as np
class Network(nn.Module):
def __init__(self):
super(Network, self).__init__()
self.fc1 = nn.Linear(2,2,bias=False)
self.fc2 = nn.Linear(2,1, bias=False)
self.rl = nn.ReLU()
def forward(self, x):
x = self.fc1(x)
x = self.rl(x)
x = self.fc2(x)
return x
#create an XOR data set to train
rng = np.random.RandomState(0)
X = rng.randn(200, 2)
y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0).astype('int32')
# test data set
X_test = np.array([[0,0],[0,1], [1,0], [1,1]])
train = data_utils.TensorDataset(torch.from_numpy(X).float(), \
torch.from_numpy(y).float())
train_loader = data_utils.DataLoader(train, batch_size=50, shuffle=True)
test = torch.from_numpy(X_test).float()
# training the network
num_epoch = 10000
net = Network()
net.fc1.weight.data.clamp_(min=-1, max=1)
net.fc2.weight.data.clamp_(min=-1, max=1)
# define loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters())
for epoch in range(num_epoch):
running_loss = 0 # loss per epoch
for (X, y)in train_loader:
# make the grads zero
optimizer.zero_grad()
# forward propagate
out = net(X)
# calculate loss and update
loss = criterion(out, y)
loss.backward()
optimizer.step()
running_loss += loss.data
if epoch%500== 0:
print("Epoch: {0} Loss: {1}".format(epoch, running_loss))
The loss doesn't improve. It gets stuck in some value after a few epochs ( i'm not sure how to make this reproducible as I'm getting different values every time)
net(test) returns a set of predictions that are no way close to XOR output.
You need to use a nonlinear activation function such as sigmoid in your hidden and output layers . because xor is not linearly separable.Also biases are required.
I am trying to understand deep-learning with pytorch. I read the pytorch tutorial: https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html, and its written:
''torch.nn only supports mini-batches. The entire torch.nn package only supports inputs that are a mini-batch of samples, and not a single sample.
For example, nn.Conv2d will take in a 4D Tensor of nSamples x nChannels x Height x Width.
If you have a single sample, just use input.unsqueeze(0) to add a fake batch dimension.''
I am not sure to understand what it means.
Indeed, I have made a simple feed-forward neural network (cf below code), on which I used a really small dataset (the idea is to learn how it works without mini-batches first, not really to have anything useful), and hence don't need to use mini-batches. Hence, I introduce directly all the sample at each epochs. If I understand it correctly, I should add 'train_data = train_data.unsqueeze(0) '. But I am not sure where, as it seems to change the data size to 1. Also, it works without adding this line so why should I really use it?
Any help would be highly appreciated!
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import os
import numpy as np
# Download data
#...
# Construct network
len_input = len(data[0])
len_output = nbr_class
print('There is %d classes used for classification'%nbr_class)
#defining a new class: Net, that extended nn.Module
#setup the “skeleton” of our network architecture
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
#creation of fully connected. A fully connected neural network layer is represented by the nn.Linear object,
#with the first argument in the definition being the number of nodes in layer l and the next argument being
#the number of nodes in layer l+1
self.fc1 = nn.Linear(len_input, 200)
self.fc2 = nn.Linear(200, 200)
self.fc3 = nn.Linear(200, len_output)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return F.log_softmax(x)
# Create initial network
epochs = 3000
model = Net()
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# Train & test network
def train(data, target, epoch):
model.train() # set the model in "training mode"
# run the main training loop
#no need batch size as small number of sample! like this the process is more exact
#zero our gradients before calling .backward() which is necessary for new sum of gradients
optimizer.zero_grad()
target_pred = model(data)
loss = criterion(target_pred, target)
#Propagate the gradients back through the network
loss.backward()
#update the weights
#tell our optimizer to “step”, meaning that the weights will be updated using the calculated gradients
#according to our rule. perform model parameter update (update weights)
optimizer.step()
# for graphing puposes
loss_array.append(loss.data[0])
if epoch in list(range(0,20000,100)):
print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.data[0]))
def test(epoch, test_data, test_target):
#eval mode to turn Dropout and BatchNorm off
model.eval()
test_loss = 0
correct = 0
test_target_pred = model(test_data)
criterion = nn.NLLLoss()
# sum up batch loss
test_loss += criterion(test_target_pred, test_target).data[0]
pred = test_target_pred.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(test_target.data).sum()
if epoch in list(range(0,20000,100)):
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_target), 100. * correct / len(test_target)))
if __name__ == '__main__':
for epoch in range(epochs):
train(data = train_data, target=train_target, epoch=epoch)
test(epoch, test_data, test_target)