Python: tqdm progress bar stuck at 0% - python

I have written the following code to train a bert model on my dataset, I have used from tqdm.notebook import tqdm this import for tqdm and have used it in the loops. But when I run the program the bar stays at 0% even after the entire code has run. How to fix this?
Code
Model
TRANSFORMERS = {
"bert-multi-cased": (BertModel, BertTokenizer, "bert-base-uncased"),
}
class Transformer(nn.Module):
def __init__(self, model, num_classes=1):
"""
Constructor
Arguments:
model {string} -- Transformer to build the model on. Expects "camembert-base".
num_classes {int} -- Number of classes (default: {1})
"""
super().__init__()
self.name = model
model_class, tokenizer_class, pretrained_weights = TRANSFORMERS[model]
bert_config = BertConfig.from_json_file(MODEL_PATHS[model] + 'bert_config.json')
bert_config.output_hidden_states = True
self.transformer = BertModel(bert_config)
self.nb_features = self.transformer.pooler.dense.out_features
self.pooler = nn.Sequential(
nn.Linear(self.nb_features, self.nb_features),
nn.Tanh(),
)
self.logit = nn.Linear(self.nb_features, num_classes)
def forward(self, tokens):
"""
Usual torch forward function
Arguments:
tokens {torch tensor} -- Sentence tokens
Returns:
torch tensor -- Class logits
"""
_, _, hidden_states = self.transformer(
tokens, attention_mask=(tokens > 0).long()
)
hidden_states = hidden_states[-1][:, 0] # Use the representation of the first token of the last layer
ft = self.pooler(hidden_states)
return self.logit(ft)
Training
def fit(model, train_dataset, val_dataset, epochs=1, batch_size=8, warmup_prop=0, lr=5e-4):
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
optimizer = AdamW(model.parameters(), lr=lr)
num_warmup_steps = int(warmup_prop * epochs * len(train_loader))
num_training_steps = epochs * len(train_loader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps)
loss_fct = nn.BCEWithLogitsLoss(reduction='mean').cuda()
for epoch in range(epochs):
model.train()
start_time = time.time()
optimizer.zero_grad()
avg_loss = 0
for step, (x, y_batch) in tqdm(enumerate(train_loader), total=len(train_loader)):
y_pred = model(x.to(device))
loss = loss_fct(y_pred.view(-1).float(), y_batch.float().to(device))
loss.backward()
avg_loss += loss.item() / len(train_loader)
xm.optimizer_step(optimizer, barrier=True)
#optimizer.step()
scheduler.step()
model.zero_grad()
optimizer.zero_grad()
model.eval()
preds = []
truths = []
avg_val_loss = 0.
with torch.no_grad():
for x, y_batch in tqdm(val_loader):
y_pred = model(x.to(device))
loss = loss_fct(y_pred.detach().view(-1).float(), y_batch.float().to(device))
avg_val_loss += loss.item() / len(val_loader)
probs = torch.sigmoid(y_pred).detach().cpu().numpy()
preds += list(probs.flatten())
truths += list(y_batch.numpy().flatten())
score = roc_auc_score(truths, preds)
dt = time.time() - start_time
lr = scheduler.get_last_lr()[0]
print(f'Epoch {epoch + 1}/{epochs} \t lr={lr:.1e} \t t={dt:.0f}s \t loss={avg_loss:.4f} \t val_loss={avg_val_loss:.4f} \t val_auc={score:.4f}')
model = Transformer("bert-multi-cased")
device = torch.device('cuda:2')
model = model.to(device)
epochs = 3
batch_size = 32
warmup_prop = 0.1
lr = 1e-4
train_dataset = JigsawDataset(df_train)
val_dataset = JigsawDataset(df_val)
test_dataset = JigsawDataset(df_test)
fit(model, train_dataset, val_dataset, epochs=epochs, batch_size=batch_size, warmup_prop=warmup_prop, lr=lr)
Output
0%| | 0/6986 [00:00<?, ?it/s]
How to fix this?

The import should be:
from tqdm import tqdm
The error is in the training function, correct this loop:
for x, y_batch in tqdm(val_loader, total = len(val_loader)):

Contrary to Ishan Dutta answer, tqdm.notebook.tqdm (and not tqdm.tqdm) is the correct function to use for both Jupyter Notebook and JupyterLab.
This problem can happen if you don't have ipywidgets installed or if you have installed ipywidgets before installing JupyterLab.
What fixed it for me was reinstalling ipywidgets:
pip3 uninstall ipywidgets --yes
pip3 install --upgrade ipywidgets

Related

Bi directional LSTM Regularization how to increase accuracy

This is my model:
class BiLSTM(nn.Module):
def __init__(self):
super(BiLSTM, self).__init__()
self.hidden_size = 128
drp = 0.2
n_classes = len(le.classes_)
self.embedding = nn.Embedding(max_features, embed_size)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(embed_size, self.hidden_size, bidirectional=True, batch_first=True)
self.linear = nn.Linear(self.hidden_size*4 , 128)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(drp)
self.out = nn.Linear(128, n_classes)
def forward(self, x):
#rint(x.size())
h_embedding = self.embedding(x)
_embedding = torch.squeeze(torch.unsqueeze(h_embedding, 0))
h_lstm, _ = self.lstm(h_embedding)
avg_pool = torch.mean(h_lstm, 1)
max_pool, _ = torch.max(h_lstm, 1)
conc = torch.cat(( avg_pool, max_pool), 1)
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
return out
n_epochs = 87
model = BiLSTM()
loss_fn = nn.CrossEntropyLoss(reduction='mean',)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.002)
model.cuda()
# Load train and test in CUDA Memory
x_train = torch.tensor(train_X, dtype=torch.long).cuda()
y_train = torch.tensor(train_y, dtype=torch.long).cuda()
x_cv = torch.tensor(test_X, dtype=torch.long).cuda()
y_cv = torch.tensor(test_y, dtype=torch.long).cuda()
# Create Torch datasets
train = torch.utils.data.TensorDataset(x_train, y_train)
valid = torch.utils.data.TensorDataset(x_cv, y_cv)
# Create Data Loaders
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=True)
train_loss = []
valid_loss = []
for epoch in range(n_epochs):
start_time = time.time()
# Set model to train configuration
model.train()
avg_loss = 0.
for i, (x_batch, y_batch) in enumerate(train_loader):
# Predict/Forward Pass
y_pred = model(x_batch)
# Compute loss
loss = loss_fn(y_pred, y_batch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_loss += loss.item() / len(train_loader)
#acc =n-avg_loss
# Set model to validation configuration
model.eval()
avg_val_loss = 0.
val_preds = np.zeros((len(x_cv),len(le.classes_)))
for i, (x_batch, y_batch) in enumerate(valid_loader):
y_pred = model(x_batch).detach()
avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
#val_accuracy = n- avg_val_loss
# keep/store predictions
val_preds[i * batch_size:(i+1) * batch_size] =F.softmax(y_pred).cpu().numpy()
val_preds=val_preds*5000
# Check Accuracy
val_accuracy = sum(val_preds.argmax(axis=1)==test_y)/len(test_y)
train_loss.append(avg_loss)
valid_loss.append(avg_val_loss)
elapsed_time = time.time() - start_time
print('Epoch {}/{} \t Train_loss={:.4f} \t val_loss={:.4f} \t val_acc={:.4f} \t time={:.2f}s'.format(
epoch + 1, n_epochs , avg_loss, avg_val_loss, val_accuracy, elapsed_time))
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy this is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy
This is the output I am receiving.I have used the BiLSTM model. I tried changing the loss techniques and dropout value but it didn't work. The issue is I guess the model is overfitting how can I increase the accuracy, The size of the dataset is 3000
[1]: https://i.stack.imgur.com/NbK92.png [output looks like this][1]
[1]: https://i.stack.imgur.com/ll12J.png [Data Looks Like this][1]

BERT text clasisification using pytorch

I am trying to build a BERT model for text classification with the help of this code [https://towardsdatascience.com/bert-text-classification-using-pytorch-723dfb8b6b5b]. My dataset contains two columns(label, text).
The labels can have three values of (0,1,2). The code works without any error but all values of confusion matrix are 0. Is there something wrong with my code?
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torchtext.data import Field, TabularDataset, BucketIterator, Iterator
import torch.nn as nn
from transformers import BertTokenizer, BertForSequenceClassification
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
torch.manual_seed(42)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
MAX_SEQ_LEN = 128
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True, fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_t>
fields = [('label', label_field), ('text', text_field)]
CLASSIFICATION_REPORT = "classification_report.jsonl"
train, valid, test = TabularDataset.splits(path='', train='train.csv', validation='validate.csv', test='test.csv', format='CSV', fields=fields, skip_header=True)
train_iter = BucketIterator(train, batch_size=16, sort_key=lambda x: len(x.text), device=device, train=True, sort=True, sort_within_batch=True)
valid_iter = BucketIterator(valid, batch_size=16, sort_key=lambda x: len(x.text), device=device, train=True, sort=True, sort_within_batch=True)
test_iter = Iterator(test, batch_size=16, device=device, train=False, shuffle=False, sort=False)
class BERT(nn.Module):
def __init__(self):
super(BERT, self).__init__()
options_name = "bert-base-uncased"
self.encoder = BertForSequenceClassification.from_pretrained(options_name, num_labels = 3)
def forward(self, text, label):
loss, text_fea = self.encoder(text, labels=label)[:2]
return loss, text_fea
def train(model, optimizer, criterion = nn.BCELoss(), train_loader = train_iter, valid_loader = valid_iter, num_epochs = 5, eval_every = len(train_iter) // 2, file_pat> running_loss = 0.0
valid_running_loss = 0.0
global_step = 0
train_loss_list = []
valid_loss_list = []
global_steps_list = []
model.train()
for epoch in range(num_epochs):
for (label, text), _ in train_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
loss, _ = output
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
global_step += 1
if global_step % eval_every == 0:
model.eval()
with torch.no_grad():
for (label, text), _ in valid_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
loss, _ = output
valid_running_loss += loss.item()
average_train_loss = running_loss / eval_every
average_valid_loss = valid_running_loss / len(valid_loader)
train_loss_list.append(average_train_loss)
valid_loss_list.append(average_valid_loss)
global_steps_list.append(global_step)
# resetting running values
running_loss = 0.0
valid_running_loss = 0.0
model.train()
# print progress
print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'.format(epoch+1, num_epochs, global_step, num_epochs*len(tra>
if best_valid_loss > average_valid_loss:
best_valid_loss = average_valid_loss
print('Finished Training!')
model = BERT().to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)
train(model=model, optimizer=optimizer)
def evaluate(model, test_loader):
y_pred = []
y_true = []
model.eval()
with torch.no_grad():
for (label, text), _ in test_loader:
label = label.type(torch.LongTensor)
label = label.to(device)
text = text.type(torch.LongTensor)
text = text.to(device)
output = model(text, label)
_, output = output
y_pred.extend(torch.argmax(output, 2).tolist())
y_true.extend(label.tolist())
print('Classification Report:')
print(classification_report(y_true, y_pred, labels=[0,1,2], digits=4))
best_model = BERT().to(device)
evaluate(best_model, test_iter)
you are using criterion = nn.BCELoss(), binary cross entropy for a multi class classification problem, "the labels can have three values of (0,1,2)". use suitable loss function for multiclass classification.

Implementing Early Stopping in Pytorch without Torchsample

As a Pytorch newbie (coming from tensorflow), I am unsure of how to implement Early Stopping. My research has led me discover that pytorch does not have a native way to so this. I have also discovered torchsample, but am unable to install it in my conda environment for whatever reason. Is there a simple way to go about applying early stopping without it? Here is my current setup:
class RegressionDataset(Dataset):
def __init__(self, X_data, y_data):
self.X_data = X_data
self.y_data = y_data
def __getitem__(self, index):
return self.X_data[index], self.y_data[index]
def __len__(self):
return len(self.X_data)
train_dataset = RegressionDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
val_dataset = RegressionDataset(torch.from_numpy(X_val).float(), torch.from_numpy(y_val).float())
test_dataset = RegressionDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())
# Model Params
EPOCHS = 100
BATCH_SIZE = 1000
LEARNING_RATE = 0.001
NUM_FEATURES = np.shape(X_test)[1]
# Initialize Dataloader
train_loader = DataLoader(dataset = train_dataset, batch_size=BATCH_SIZE, shuffle = True)
val_loader = DataLoader(dataset = val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset = test_dataset, batch_size=BATCH_SIZE)
# Define Neural Network Architecture
class MultipleRegression(nn.Module):
def __init__(self, num_features):
super(MultipleRegression, self).__init__()
# Define architecture
self.layer_1 = nn.Linear(num_features, 16)
self.layer_2 = nn.Linear(16, 32)
self.layer_3 = nn.Linear(32, 25)
self.layer_4 = nn.Linear(25, 20)
self.layer_5 = nn.Linear(20, 16)
self.layer_out = nn.Linear(16, 1)
self.relu = nn.ReLU() # ReLU applied to all layers
# Initialize weights and biases
nn.init.xavier_uniform_(self.layer_1.weight)
nn.init.zeros_(self.layer_1.bias)
nn.init.xavier_uniform_(self.layer_2.weight)
nn.init.zeros_(self.layer_2.bias)
nn.init.xavier_uniform_(self.layer_3.weight)
nn.init.zeros_(self.layer_3.bias)
nn.init.xavier_uniform_(self.layer_4.weight)
nn.init.zeros_(self.layer_4.bias)
nn.init.xavier_uniform_(self.layer_5.weight)
nn.init.zeros_(self.layer_5.bias)
nn.init.xavier_uniform_(self.layer_out.weight)
nn.init.zeros_(self.layer_out.bias)
def forward(self, inputs):
x = self.relu(self.layer_1(inputs))
x = self.relu(self.layer_2(x))
x = self.relu(self.layer_3(x))
x = self.relu(self.layer_4(x))
x = self.relu(self.layer_5(x))
x = self.layer_out(x)
return(x)
def predict(self, test_inputs):
x = self.relu(self.layer_1(test_inputs))
x = self.relu(self.layer_2(x))
x = self.relu(self.layer_3(x))
x = self.relu(self.layer_4(x))
x = self.relu(self.layer_5(x))
x = self.layer_out(x)
return(x)
# Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model = MultipleRegression(NUM_FEATURES)
model.to(device)
print(model)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = LEARNING_RATE)
# define dictionary to store loss/epochs for training and validation
loss_stats = {
"train": [],
"val": []
}
# begin training
print("Begin Training")
for e in tqdm(range(1, EPOCHS+1)):
# Training
train_epoch_loss = 0
model.train()
for X_train_batch, y_train_batch in train_loader:
X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
optimizer.zero_grad()
y_train_pred = model(X_train_batch)
train_loss = criterion(y_train_pred, y_train_batch.unsqueeze(1))
train_loss.backward()
optimizer.step()
train_epoch_loss += train_loss.item()
# validation
with torch.no_grad():
val_epoch_loss = 0
model.eval()
for X_val_batch, y_val_batch in val_loader:
X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
y_val_pred = model(X_val_batch)
val_loss = criterion(y_val_pred, y_val_batch.unsqueeze(1))
val_epoch_loss += val_loss.item()
loss_stats["train"].append(train_epoch_loss/len(train_loader))
loss_stats["val"].append(val_epoch_loss/len(val_loader))
print(f"Epoch {e}: \ Train loss: {train_epoch_loss/len(train_loader):.5f} \ Val loss: {val_epoch_loss/len(val_loader):.5f}")
# Visualize loss and accuracy
train_val_loss_df = pd.DataFrame.from_dict(loss_stats).reset_index().melt(id_vars=["index"]).rename(columns = {"index":"epochs"})
plt.figure()
sns.lineplot(data = train_val_loss_df, x = "epochs", y = "value", hue = "variable").set_title("Train-Val Loss/Epoch")
# Test model
y_pred_list = []
with torch.no_grad():
model.eval()
for X_batch, _ in test_loader:
X_batch = X_batch.to(device)
y_test_pred = model(X_batch)
y_pred_list.append(y_test_pred.cpu().numpy())
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_pred_list = [item for sublist in y_pred_list for item in sublist]
y_pred_list = np.array(y_pred_list)
mse = mean_squared_error(y_test, y_pred_list)
r_square = r2_score(y_test, y_pred_list)
print("Mean Squared Error :", mse)
print("R^2 :", r_square)
A basic way to do this is to keep track of the best validation loss obtained so far.
You can have a variable best_loss = 0 initialized before your loop over epochs (or you could do other things like best loss per epoch, etc.).
After each validation pass then do:
if val_loss > best_loss:
best_loss = val_loss
# At this point also save a snapshot of the current model
torch.save(model, 'my_model_best_loss.pth')
Then, if the best_loss does not improve significantly after some number of training steps, or by the end of the epoch, or if it val_loss gets worse, break out of the loop and terminate the training there.
For implementing algorithms like early stopping (and your training loop in general) you may find it easier to give PyTorch Lightning a try (no affiliation, but it's much easier than trying to roll everything by hand).

Load a saved NN model in different Python file

I am trying to implement the code from a Pytorch beginner's tutorial. But I have written the code for loading the saved model in another Python file.
The FashionClassify file contains the code exactly as its in the tutorial.
Below is the code:
from FashionClassify import NeuralNetwork
from FashionClassify import test_data
import torch
model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))
classes = [
"T-shirt/top", "Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot",
]
model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
pred = model(x)
predicted, actual = classes[pred[0].argmax(0)],classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')
However, when I run this, the entire training process starts again. Why is that so ?
OR
Is it an expected behavior ?
(I have gone through a couple of webpages and StackOverflow answers but couldn't find my problem)
FashionClassify file code:
import torch
from torch import nn
from torch.utils.data import DataLoader # wraps an iterable around dataset
from torchvision import datasets # stores samples and their label
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib as plt
training_data = datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor(), )
test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor(), )
batch_size = 64
train_dataLoader = DataLoader(training_data, batch_size=batch_size)
test_dataLoader = DataLoader(test_data, batch_size=batch_size)
for X, y in test_dataLoader:
print('Shape of X [N,C,H,W]:', X.size())
print('Shape of y:', y.shape, y.dtype)
break
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))
# to define a NN, we inherit a class from nn.Module
class NeuralNetwork(nn.Module):
def __init__(self):
# will specify how data will proceed in the forward pass
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28 * 28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X,y) in enumerate(dataloader):
X,y = X.to(device), y.to(device)
#compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
#backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch%100 ==0:
loss,current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model):
size = len(dataloader.dataset)
model.eval()
test_loss, correct = 0,0
with torch.no_grad():
for X, y in dataloader:
X,y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= size
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataLoader, model, loss_fn, optimizer)
test(test_dataLoader, model)
print("Done!")
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")
That's what happens when you import another file. All the code gets rerun.
Instead, in your training file:
class FancyNetwork(nn.Module):
[...]
def train():
[train code]
if __name__ == "__main__":
train()
Now when you run this file train() will get called, but when you import this file in another one, train won't get called automatically.

Pytorch: No training effect after deepcopy

I tried to make a copy of a neural network in pytorch and subsequently train the copied network, but training does not seem to change the weights in the network after copying. This post suggests that deepcopy is a convenient way to make a copy of a neural network, so I tried using that in my code.
The code below works just fine and shows that the weights and accuracy of the network are different after training from before training. However, when I toggle so that network_cp=deepcopy(network) and optimizer_cp=deepcopy(optimizer), the accuracy and weights before and after training are exactly the same.
# torch settings
torch.backends.cudnn.enabled = True
device = torch.device("cpu")
# training settings
learning_rate = 0.01
momentum = 0.5
batch_size_train = 64
batch_size_test = 1000
# get MNIST data set
train_loader, test_loader = load_mnist(batch_size_train=batch_size_train,
batch_size_test=batch_size_test)
# make a network
network = Net()
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
momentum=momentum)
network.to(device)
# train network
train(network, optimizer, train_loader, device)
# copy network
network_cp = network
#network_cp = deepcopy(network)
optimizer_cp = optimizer
#optimizer_cp = deepcopy(optimizer)
# get edge weights and accuracy of the copied network
acc1 = float(test(network_cp, optimizer_cp, test_loader, device))
weights1 = np.array(get_edge_weights(network_cp))
# train copied network
train(network_cp, optimizer_cp, train_loader, device)
# get edge weights and accuracy of the copied network after training
acc2 = float(test(network_cp, optimizer_cp, test_loader, device))
weights2 = np.array(get_edge_weights(network_cp))
# compare edge weights and accuracy of copied network before and after training
print('accuracy', acc1, acc2)
print('abs diff of weights for net1 and net2', np.sum(np.abs(weights1-weights2)))
To run the code above, include these imports and function definitions:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as tnn
import torch.nn.functional as tnf
from copy import deepcopy
import numpy as np
def load_mnist(batch_size_train = 64, batch_size_test = 1000):
train_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('temp/', #'/data/users/alice/pytorch_training_files/',
train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(
torchvision.datasets.MNIST('temp/', #'/data/users/alice/pytorch_training_files/',
train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
])),
batch_size=batch_size_test, shuffle=True)
return(train_loader, test_loader)
def train(network, optimizer, train_loader, device, n_epochs=5):
network.train()
for epoch in range(1, n_epochs + 1):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = network(data)
loss = tnf.nll_loss(output, target)
loss.backward()
optimizer.step()
def test(network, optimizer, test_loader, device):
network.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = network(data)
test_loss += tnf.nll_loss(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return(float(correct)/float(len(test_loader.dataset)))
def get_edge_weights(network):
layers = [module for module in network.modules()][1:]
output = np.zeros(1)
for j, layer in enumerate(layers):
weights = list(layer.parameters())[0]
weights_arr = weights.detach().numpy()
weights_arr = weights_arr.flatten()
output = np.concatenate((output,weights_arr))
return output[1:]
class Net(tnn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 =tnn.Linear(784,264)
self.fc2 = tnn.Linear(264,10)
def forward(self, x):
x = tnf.relu(self.fc1(x.view(-1,784)))
x = tnf.relu(self.fc2(x))
return tnf.log_softmax(x)
After optimizer_cp = deepcopy(optimizer), the optimizer_cp still wants to optimize the old model's parameters (as defined by optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)).
After deep copying the model, the optimizer needs to be told to optimize this new model's parameters:
optimizer_cp = optim.SGD(network_cp.parameters(), lr=learning_rate, momentum=momentum)

Categories