I am very rookie in transferring my code from Keras/Tensorflow to Pytorch and I am trying to retrain my TF model in Pytorch, however, my dataset has some particularities which make it difficult to me to make it run in Pytorch.
To understand my issues, recall that I have a custom dataset initialized this way:
class MyDataSet(torch.utils.data.Dataset):
def __init__(self, x, y, transform=None):
super(MyDataSet, self).__init__()
# store the raw tensors
self._x = np.load(x)
self._y = np.load(y)
self._x=np.swapaxes(self._x,3,2)
self._x=np.swapaxes(self._x,2,1)
self.transform = transform
def __len__(self):
# a DataSet must know it size
return self._x.shape[0]
def __getitem__(self, index):
x = self._x[index, :]
y = self._y[index]
return x, y
The shape of _self._x is (12000, 3, 224, 224) and the shape of self._y is (12000,)
I am fine-tuning a pre-trained RESNET-50 in this data, and the training happens the following way:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet50
import time
import copy
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
#Transform dataset
print("Loading Data")
transform = transforms.Compose([transforms.ToTensor()])
dataset = MyDataSet("me/train1-features.npy","/me/train1-classes.npy",transform=transform)
dataloader = DataLoader(dataset, batch_size=4)
print("Configuring network")
feature_extract = True
num_epochs = 15
num_classes=12
model_ft = resnet50(pretrained=True)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
if torch.cuda.is_available():
model_ft.cuda()
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
params_to_update = []
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
params_to_update.append(param)
print("\t",name)
else:
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
print("\t",name)
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()
#Train (how to validate?)
for epoch in range(num_epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(dataloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
#transfer labels and inputs to cuda()
inputs,labels=inputs.cuda(), labels.cuda()
# zero the parameter gradients
optimizer_ft.zero_grad()
# forward + backward + optimize
outputs = model_ft(inputs)
loss = loss_func(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
However, whenever I run this code, I receive the following error
Traceback (most recent call last):
File "train_my_data_example.py", line 114, in <module>
outputs = model_ft(inputs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torchvision/models/resnet.py", line 249, in forward
return self._forward_impl(x)
File "/usr/local/lib/python3.8/dist-packages/torchvision/models/resnet.py", line 232, in _forward_impl
x = self.conv1(x)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/conv.py", line 399, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/conv.py", line 395, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.cuda.FloatTensor) should be the same
I also can do the train and validation procedures normally on TF/Keras, but I don't know how to do that in my custom Dataset with Pytorch.
How can I solve my problem and also run train/val loop with Pytorch in my custom data?
It seems that np.load is loading binary data to X so ToTensor() is trying to preserve the dtype by coercing it to a ByteTensor. You can fix this by making a small change in __getitem__:
def __getitem__(self, index):
x = self._x[index, :]
y = self._y[index]
return x.astype(np.float32), y
Related
I am using Bert pretrained model for the multi-class text classification got this error RuntimeError: stack expects each tensor to be equal size, but got [36, 3] at entry 0 and
[48, 3] at entry 1
As per PyTorch Docs about torch.stack() function, it needs the input tensors in the same shape to stack. but cant find this error whee
import config
import torch
class BERTDataset:
def __init__(self,text, targets):
self.tokenizer = config.TOKENIZER
self.max_len = config.MAX_LEN
self.text = text
self.targets = targets
def __len__(self):
return len(self.text)
def __getitem__(self, item):
text = str(self.text[item])
text = " ".join(text.split())
inputs = self.tokenizer.batch_encode_plus(
text,
add_special_tokens=True,
return_attention_mask=True,
padding=True,
max_length=config.MAX_LEN,
return_tensors='pt',
truncation = True
)
ids = inputs["input_ids"]
mask = inputs["attention_mask"]
token_type_ids = inputs["token_type_ids"]
return {
"ids" : torch.tensor(ids,dtype=torch.long),
"mask" : torch.tensor(mask,dtype=torch.long),
"token_type_ids" : torch.tensor(token_type_ids,dtype=torch.long),
"targets" : torch.tensor(self.targets[item],dtype = torch.long)
}
from tqdm import tqdm
import config
import torch
import numpy as np
def train_fn(model, dataloader, optimizer, scheduler, device):
progress_bar = tqdm(dataloader, desc='Epoch {:1d}'.format(config.EPOCHS), leave=False, disable=False)
for batch in progress_bar:
loss_train_total = 0
model.zero_grad()
batch = tuple(b.to(device) for b in batch)
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'token_type_ids': batch[2],
'targets': batch[3],
}
outputs = model(**inputs)
loss = outputs[0]
loss_train_total += loss.item()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
return loss_train_total
def evaluate(dataloader,model,device):
model.eval()
loss_val_total = 0
predictions, true_vals = [], []
for batch in dataloader:
batch = tuple(b.to(device) for b in batch)
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'token_type_ids': batch[2],
'targets': batch[3],
}
with torch.no_grad():
outputs = model(**inputs)
loss = outputs[0]
logits = outputs[1]
loss_val_total += loss.item()
logits = logits.detach().cpu().numpy()
label_ids = inputs['labels'].cpu().numpy()
predictions.append(logits)
true_vals.append(label_ids)
loss_val_avg = loss_val_total/len(dataloader)
predictions = np.concatenate(predictions, axis=0)
true_vals = np.concatenate(true_vals, axis=0)
return loss_val_avg, predictions, true_vals
import config
import transformers
import torch.nn as nn
class BertBaseUncased(nn.Module):
def __init__(self,dropout = 0.25,num_classes = 21):
super(BertBaseUncased,self).__init__()
self.bert = transformers.BertModel.from_pretrained(
config.BERT_PATH,return_dict=False
)
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768,num_classes)
self.relu = nn.ReLU()
def forward(self,ids, mask, token_type_ids):
_, pooled_output = self.bert(ids,attention_mask = mask,token_type_ids = token_type_ids)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
final_layer = self.relu(linear_output)
return final_layer
import pandas as pd
from torch.utils.data import DataLoader
import torch
import config
from sklearn.model_selection import train_test_split
from dataset import BERTDataset
from model import BertBaseUncased
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm
import engine
import utils
def run():
df =pd.read_csv(config.TRAINING_FILE).fillna("none")
df.label = pd.factorize(df.label)[0]
df_train, df_valid = train_test_split(
df,
test_size=0.2,
random_state=42,
stratify=df.label.values
)
df_train = df_train.reset_index(drop = True)
df_valid = df_valid.reset_index(drop = True)
train_dataset = BERTDataset(text = df_train.text.values, targets =df_train.label.values)
test_dataset = BERTDataset(text = df_valid.text.values, targets = df_valid.label.values)
train_dataloader = DataLoader(train_dataset,
batch_size=config.TRAIN_BATCH_SIZE,
num_workers=4,
shuffle=True)
val_dataloader = DataLoader(test_dataset,
batch_size=config.VALID_BATCH_SIZE,
num_workers=4,
shuffle=False)
device = config.DEVICE
model = BertBaseUncased()
model.to(device)
optimizer = AdamW(model.parameters(),lr=config.LEARNING_RATE, eps=config.ESP)
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps=0,
num_training_steps=len(train_dataloader)*config.EPOCHS)
for _ in tqdm(range(1, config.EPOCHS+1)):
model.train()
torch.save(model.state_dict(), config.MODEL_PATH)
#torch.save(model.save_pretrained(config.MODEL_PATH))
tqdm.write(f'\nEpoch {config.EPOCHS}')
loss_train_total = engine.train_fn(model, train_dataloader, optimizer, scheduler, device)
loss_train_avg = loss_train_total/len(train_dataloader)
tqdm.write(f'Training loss: {loss_train_avg}')
val_loss, predictions, true_vals = engine.evaluate(val_dataloader,model,device)
val_f1 = utils.f1_score_func(predictions, true_vals)
tqdm.write(f'Validation loss: {val_loss}')
tqdm.write(f'F1 Score (Weighted): {val_f1}')
if __name__=="__main__":
run()
File "C:\Users\IDRBT-18\anaconda3\lib\site-packages\torch\utils\data\_utils\worker.py", line
302, in _worker_loop
data = fetcher.fetch(index)
File "C:\Users\IDRBT-18\anaconda3\lib\site-packages\torch\utils\data\_utils\fetch.py", line
61, in fetch
return self.collate_fn(data)
File "C:\Users\IDRBT-18\anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py", line
265, in default_collate
return collate(batch, collate_fn_map=default_collate_fn_map)
File "C:\Users\IDRBT-18\anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py", line
128, in collate
return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for
key in elem})
File "C:\Users\IDRBT-18\anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py",
line 128, in <dictcomp>
return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for
key in elem})
File "C:\Users\IDRBT-18\anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py",
line 120, in collate
return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
File "C:\Users\IDRBT-18\anaconda3\lib\site-packages\torch\utils\data\_utils\collate.py",
line 163, in collate_tensor_fn
return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [36, 3] at entry 0 and
[48, 3] at entry 1
i am finding reason behind that but not get any result if someone please give reply
I am a beginner to machine learning and trying to train a model on counting the amount of numbers below 0.5 in a 1D Vector with the length of 10. The input vectors contain number between 0 and 1. I generate the input data and the labels in my script instead of having them in a seperate file, because the data is so simple.
This is the Code:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.lin1 = nn.Linear(10,10)
self.lin2 = nn.Linear(10,1)
def forward(self,x):
x = self.lin1(x)
x = F.relu(x)
x = self.lin2(x)
return x
net = MyNet()
net.to(device)
def train():
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.1)
for epochs in range(100):
target = 0
data = torch.rand(10)
for entry in data:
if entry < 0.5:
target += 1
# print(target)
# print(data)
data = data.to(device)
out = net(data)
# print(out)
target = torch.Tensor(target)
target = target.to(device)
loss = criterion(out, target)
print(loss)
net.zero_grad()
loss.backward()
optimizer.step()
def test():
acc_error = 0
for i in range(100):
test_data = torch.rand(10)
test_data.to(device)
test_target = 0
for entry in test_data:
if entry < 0.5:
test_target += 1
out = net(test_data)
error = test_target - out
if error < 0:
error *= -1
acc_error += error
overall_error = acc_error / 100
print(overall_error)
train()
test()
This is the error:
Traceback (most recent call last):
File "test1.py", line 70, in <module>
test()
File "test1.py", line 59, in test
out = net(test_data)
File "/vol/fob-vol7/mi18/radtklau/SP/sem_project/lib64/python3.6/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "test1.py", line 15, in forward
x = self.lin1(x)
File "/vol/fob-vol7/mi18/radtklau/SP/sem_project/lib64/python3.6/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/vol/fob-vol7/mi18/radtklau/SP/sem_project/lib64/python3.6/site-packages/torch/nn/modules/linear.py", line 94, in forward
return F.linear(input, self.weight, self.bias)
File "/vol/fob-vol7/mi18/radtklau/SP/sem_project/lib64/python3.6/site-packages/torch/nn/functional.py", line 1753, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)
The other posts regarding the topic have not solved my problem. Maybe somebody can help. Thanks!
Notice how your error message traces back to test, while train works fine.
You've transfered your data correctly in train:
data = data.to(device)
But not in test:
test_data.to(device)
Instead it should be reassigned to test_data, since torch.Tensor.to makes a copy:
test_data = test_data.to(device)
import torch
import torch.nn as nn
import torchvision.datasets as dsets
from skimage import transform
import torchvision.transforms as transforms
from torch.autograd import Variable
import pandas as pd;
import numpy as np;
from torch.utils.data import Dataset, DataLoader
import statistics
import random
import math
class FashionMNISTDataset(Dataset):
'''Fashion MNIST Dataset'''
def __init__(self, csv_file, transform=None):
"""
Args:
csv_file (string): Path to the csv file
transform (callable): Optional transform to apply to sample
"""
data = pd.read_csv(csv_file)
self.X = np.array(data.iloc[:, 1:]).reshape(-1, 1, 28, 28)
self.Y = np.array(data.iloc[:, 0])
del data
self.transform = transform
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
item = self.X[idx]
label = self.Y[idx]
if self.transform:
item = self.transform(item)
return (item, label)
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Linear(616,300),
nn.ReLU())
self.layer2 = nn.Sequential(
nn.Linear(300,100),
nn.ReLU())
self.fc = nn.Linear(100, 10)
def forward(self, x):
print("x shape",x.shape)
out = self.layer1(x)
out = self.layer2(out)
out = self.fc(out)
return out
def run():
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 15
batch_size = 100
learning_rate = 0.0001
train_dataset = FashionMNISTDataset(csv_file='fashion-mnist_train.csv')
test_dataset = FashionMNISTDataset(csv_file='fashion-mnist_test.csv')
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)
#instance of the Conv Net
cnn = CNN()
cnn.to(device)
#loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
losses = []
for epoch in range(num_epochs):
l = 0
for i, (images, labels) in enumerate(train_loader):
images = Variable(images.float())
labels = Variable(labels)
#print(images[0])
images = images.to(device)
labels = labels.to(device)
print("img shape=",images.shape, "label shape=",labels.shape)
images = images.resize_((100,616))
print("img shape=",images.shape, "label shape=",labels.shape)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = cnn(images)
loss = criterion(outputs, labels)
#print(loss)
loss.backward()
optimizer.step()
#print(loss.item())
losses.append(loss.item())
l = loss.item()
cnn.eval()
with torch.no_grad():
val_loss = []
for images, labels in test_loader:
images = Variable(images.float()).to(device)
labels = labels.to(device)
outputs = cnn.forward(images)
batch_loss = criterion(outputs, labels)
val_loss.append(batch_loss.item())
avgloss = statistics.mean(val_loss)
if avgloss < min(losses):
torch.save(cnn.state_dict(), 'model')
cnn.train()
if (i+1) % 100 == 0:
print ('Epoch : %d/%d, Iter : %d/%d, Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.item()))
print(l)
final_model = CNN()
final_model.load_state_dict(torch.load('model'))
final_model.eval()
correct = 0
total = 0
for images, labels in test_loader:
images = Variable(images.float()).to(device)
outputs = final_model(images).to(device)
labels.to(device)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
print('Test Accuracy of the model on the 10000 test images: %.4f %%' % (100 * correct / total))
if __name__ == '__main__':
run()
I have enclosed all the code for testing purposes. But Here is the error I get
img shape= torch.Size([100, 1, 28, 28]) label shape= torch.Size([100])
img shape= torch.Size([100, 616]) label shape= torch.Size([100]) x
shape torch.Size([100, 616]) x shape torch.Size([100, 1, 28, 28])
Traceback (most recent call last): File "test.py", line 145, in
run() File "test.py", line 115, in run
outputs = cnn.forward(images) File "test.py", line 56, in forward
out = self.layer1(x) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/modules/module.py",
line 489, in call
result = self.forward(*input, **kwargs) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/modules/container.py",
line 92, in forward
input = module(input) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/modules/module.py",
line 489, in call
result = self.forward(*input, **kwargs) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/modules/linear.py",
line 67, in forward
return F.linear(input, self.weight, self.bias) File "/usr/share/anaconda3/envs/DL/lib/python3.6/site-packages/torch/nn/functional.py",
line 1354, in linear
output = input.matmul(weight.t()) RuntimeError: size mismatch, m1: [2800 x 28], m2: [616 x 300] at
/opt/conda/conda-bld/pytorch_1549630534704/work/aten/src/THC/generic/THCTensorMathBlas.cu:266
The problem here is that I want all 616 pixels to feed as input into the neural network but I dont know how to do so. I tried to reshape the input to solve the problem but it ran model.forward twice, once with the correct shape and then the wrong shape.
You are calling forward twice in run:
Once for the training data
Once for the validation data
However, you do not appear to have applied the following transformation to your validation data:
images = images.resize_((100,616))
Maybe consider doing the resize in the forward function.
I'm trying to train a simple GAN on GPU with Keras. I verified that the code runs with CPU on my laptop. Then, I added multi_gpu_model as per below to enable it to run on a set of GPUs. However, I am getting the following error:
RuntimeError: ('The name "Discriminator" is used 2 times in the model. All layer names should be unique. Layer names: ', ['input_3', 'input_4', 'lambda_3', 'lambda_4', 'lambda_5', 'lambda_6', 'model_3', 'Discriminator', 'Discriminator'])
The error seems to suggest that because I'm calling the Discriminator model multiple times, the name of the layers overlap. However, it is unclear to me how I can get around this problem. For your convenience, the full code is provided below:
from __future__ import print_function, division
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.merge import _Merge
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D, Convolution2D, Conv2DTranspose
from keras.models import Sequential, Model
from keras.optimizers import Adam, RMSprop
from keras.utils import multi_gpu_model
import keras.backend as K
from optimizer import optimAdam
from functools import partial
# import inception_score
import _pickle as cPickle
import matplotlib.pyplot as plt
import os
import sys
import tensorflow as tf
import numpy as np
from PIL import Image
BATCH_SIZE = 128
GRADIENT_PENALTY_WEIGHT = 10
class RandomWeightedAverage(_Merge):
"""Takes a randomly-weighted average of two tensors. In geometric terms, this outputs a random point on the line
between each pair of input points.
Inheriting from _Merge is a little messy but it was the quickest solution I could think of.
Improvements appreciated."""
def _merge_function(self, inputs):
weights = K.random_uniform((BATCH_SIZE, 1, 1, 1))
return (weights * inputs[0]) + ((1 - weights) * inputs[1])
class GANGPU():
def __init__(self,gan_type):
assert gan_type in ['gan','wgan','improved_wgan','optim']
print("GAN Type: " + gan_type)
self.type = gan_type
self.noise_shape = (100,)
self.img_shape = (28, 28, 1)
self.clip_value = 0.0001 # threshold for weight cliping (-c,c)
self.d_losses = []
real_img = Input(shape=self.img_shape)
# set gan type specific parameters
optimizer = self.select_optimizer()
loss = self.select_loss()
self.n_critic = self.select_n_critic()
# Now we initialize the generator and discriminator.
generator = self.make_generator()
discriminator = self.make_discriminator()
# The parallel_generator_model is used when we want to train the generator layers.
# As such, we ensure that the discriminator layers are not trainable.
for layer in discriminator.layers:
layer.trainable = False
discriminator.trainable = False
generator_input = Input(shape=(100,))
generator_layers = generator(generator_input)
discriminator_layers_for_generator = discriminator(generator_layers)
generator_model = Model(inputs=[generator_input], outputs=[discriminator_layers_for_generator])
# We use the Adam paramaters from Gulrajani et al.
parallel_generator_model = multi_gpu_model(generator_model, gpus=2)
parallel_generator_model.compile(optimizer=optimizer, loss=loss)
# Now that the parallel_generator_model is compiled, we can make the discriminator layers trainable.
for layer in discriminator.layers:
layer.trainable = True
for layer in generator.layers:
layer.trainable = False
discriminator.trainable = True
generator.trainable = False
# The parallel_discriminator_model is more complex. It takes both real image samples and random noise seeds as input.
# The noise seed is run through the generator model to get generated images. Both real and generated images
# are then run through the discriminator.
real_samples = Input(shape=self.img_shape)
generator_input_for_discriminator = Input(shape=self.noise_shape)
generated_samples_for_discriminator = generator(generator_input_for_discriminator)
discriminator_output_from_generator = discriminator(generated_samples_for_discriminator)
discriminator_output_from_real_samples = discriminator(real_samples)
if self.type in ['gan','wgan']:
discriminator_model = Model(inputs=[real_samples, generator_input_for_discriminator],
outputs=[discriminator_output_from_real_samples,
discriminator_output_from_generator])
parallel_discriminator_model = multi_gpu_model(discriminator_model, gpus=2)
parallel_discriminator_model.compile(optimizer=optimizer,
loss=[loss,
loss])
elif self.type in ['improved_wgan','optim']:
print("Gradient Penalty Applied")
# We also need to generate weighted-averages of real and generated samples, to use for the gradient norm penalty.
averaged_samples = RandomWeightedAverage()([real_samples, generated_samples_for_discriminator])
# We then run these samples through the discriminator as well. Note that we never really use the discriminator
# output for these samples - we're only running them to get the gradient norm for the gradient penalty loss.
averaged_samples_out = discriminator(averaged_samples)
# The gradient penalty loss function requires the input averaged samples to get gradients. However,
# Keras loss functions can only have two arguments, y_true and y_pred. We get around this by making a partial()
# of the function with the averaged samples here.
partial_gp_loss = partial(self.gradient_penalty_loss,
averaged_samples=averaged_samples,
gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT)
partial_gp_loss.__name__ = 'gradient_penalty' # Functions need names or Keras will throw an error
discriminator_model = Model(inputs=[real_samples, generator_input_for_discriminator],
outputs=[discriminator_output_from_real_samples,
discriminator_output_from_generator,
averaged_samples_out])
parallel_discriminator_model = multi_gpu_model(discriminator_model, gpus=2)
parallel_discriminator_model.compile(optimizer=optimizer,
loss=[loss,
loss,
partial_gp_loss])
self.parallel_generator_model, self.parallel_discriminator_model = parallel_generator_model, parallel_discriminator_model
self.generator, self.discriminator = generator, discriminator
def select_optimizer(self):
if self.type == 'gan':
print("Optimizer: Adam")
return Adam(lr=0.0002, beta_1=0.5)
elif self.type == 'wgan':
print("Optimizer: RMSProp")
return RMSprop(lr=0.00005)
elif self.type == 'improved_wgan':
print("Optimizer: Adam")
return Adam(lr=0.0001, beta_1=0.5, beta_2=0.9)
elif self.type == 'optim':
print("Optimizer: OptimAdam")
return optimAdam(lr=0.0001, beta_1=0.5, beta_2=0.9)
def select_loss(self):
if self.type == 'gan':
print("Loss: Binary Cross Entropy")
return 'binary_crossentropy'
elif self.type in ['wgan','improved_wgan','optim']:
print("Loss: Wasserstein")
return self.wasserstein_loss
def select_n_critic(self):
if self.type == 'gan':
print("Critics Ratio: 1")
return 1
elif self.type in ['wgan','improved_wgan','optim']:
print("Critics Ratio: 5")
return 5
# for WGAN, Improved WGAN, Optim
def wasserstein_loss(self, y_true, y_pred):
return K.mean(y_true * y_pred)
# for Improved WGAN, Optim
def gradient_penalty_loss(self, y_true, y_pred, averaged_samples, gradient_penalty_weight):
gradients = K.gradients(K.sum(y_pred), averaged_samples)
gradient_l2_norm = K.sqrt(K.sum(K.square(gradients)))
gradient_penalty = gradient_penalty_weight * K.square(1 - gradient_l2_norm)
return gradient_penalty
def make_generator(self):
# 2-layer fully connected NN: 100 x 512 x 784
model = Sequential(name='Generator')
model.add(Dense(256, activation="relu", input_dim=100))
model.add(Dense(np.prod(self.img_shape), activation='tanh'))
model.add(Reshape(self.img_shape))
return model
def make_discriminator(self):
# 2-layer fully connected NN: 784 x 512 x 1
model = Sequential(name='Discriminator')
model.add(Flatten(input_shape=self.img_shape))
model.add(Dense(512, activation="relu"))
model.add(Dense(1, activation='sigmoid'))
return model
def generate_images(self, output_dir, epoch):
"""Feeds random seeds into the generator and tiles and saves the output to a PNG file."""
def tile_images(image_stack):
"""Given a stacked tensor of images, reshapes them into a horizontal tiling for display."""
assert len(image_stack.shape) == 3
image_list = [image_stack[i, :, :] for i in range(image_stack.shape[0])]
tiled_images = np.concatenate(image_list, axis=1)
return tiled_images
test_image_stack = self.generator.predict(np.random.rand(100, 100))
test_image_stack = (test_image_stack * 127.5) + 127.5
test_image_stack = np.squeeze(np.round(test_image_stack).astype(np.uint8))
tiled_output = tile_images(test_image_stack)
tiled_output = Image.fromarray(tiled_output, mode='L') # L specifies greyscale
outfile = os.path.join(output_dir, 'epoch_{}.png'.format(epoch))
tiled_output.save(outfile)
outfile = os.path.join(output_dir, 'epoch_{}.pkl'.format(epoch))
with open(outfile, 'wb') as f:
cPickle.dump(test_image_stack, f)
def train(self, epochs, batch_size=128, save_interval=50):
# First we load the image data, reshape it and normalize it to the range [-1, 1]
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.concatenate((X_train, X_test), axis=0)
if K.image_data_format() == 'channels_first':
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1], X_train.shape[2]))
else:
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
# We make three label vectors for training. positive_y is the label vector for real samples, with value 1.
# negative_y is the label vector for generated samples, with value -1. The dummy_y vector is passed to the
# gradient_penalty loss function and is not used.
positive_y = np.ones((batch_size, 1), dtype=np.float32)
negative_y = -positive_y
if self.type in ['improved_wgan','optim']:
dummy_y = np.zeros((batch_size, 1), dtype=np.float32)
self.discriminator_losses = []
self.generator_losses = []
output_dir = '../log_'+self.type
for epoch in range(epochs):
np.random.shuffle(X_train)
print("Epoch: ", epoch)
print("Number of batches: ", int(X_train.shape[0] // batch_size))
discriminator_loss = []
generator_loss = []
minibatches_size = batch_size * self.n_critic
for i in range(int(X_train.shape[0] // (batch_size * self.n_critic))):
# ---------------------
# Train Discriminator
# ---------------------
discriminator_minibatches = X_train[i * minibatches_size:(i + 1) * minibatches_size]
for j in range(self.n_critic):
image_batch = discriminator_minibatches[j * batch_size:(j + 1) * batch_size]
noise = np.random.rand(batch_size, 100).astype(np.float32)
if self.type in ['gan','wgan']:
discriminator_loss.append(self.parallel_discriminator_model.train_on_batch([image_batch, noise],
[positive_y, negative_y]))
elif self.type in ['improved_wgan','optim']:
discriminator_loss.append(self.parallel_discriminator_model.train_on_batch([image_batch, noise],
[positive_y, negative_y, dummy_y]))
if self.type == 'wgan':
# Clip discriminator weights
for l in self.parallel_discriminator_model.layers:
weights = l.get_weights()
weights = [np.clip(w, -self.clip_value, self.clip_value) for w in weights]
l.set_weights(weights)
# ---------------------
# Train Generator
# ---------------------
noise = np.random.normal(0, 1, (batch_size, 100))
generator_loss.append(self.parallel_generator_model.train_on_batch(noise, positive_y))
# If at save interval => save generated image samples
if epoch % save_interval == 0:
self.generate_images(output_dir, epoch)
self.generator.save_weights(os.path.join(output_dir, 'epoch_{}_g.h5'.format(epoch)))
self.discriminator.save_weights(os.path.join(output_dir, 'epoch_{}_d.h5'.format(epoch)))
self.discriminator_losses.append(discriminator_loss)
self.generator_losses.append(generator_loss)
if __name__ == '__main__':
gan = GANGPU('gan')
gan.train(100, batch_size=BATCH_SIZE, save_interval=1)
Here's the full traceback:
Traceback (most recent call last):
File "gangpu.py", line 278, in <module>
gan = GANGPU('gan')
File "gangpu.py", line 96, in __init__
parallel_discriminator_model = multi_gpu_model(discriminator_model, gpus=2)
File "/n/home06/koshiba/.conda/envs/Keras7/lib/python3.6/site-packages/keras/utils/multi_gpu_utils.py", line 189, in multi_gpu_model
return Model(model.inputs, merged)
File "/n/home06/koshiba/.conda/envs/Keras7/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/n/home06/koshiba/.conda/envs/Keras7/lib/python3.6/site-packages/keras/engine/topology.py", line 1829, in __init__
'Layer names: ', all_names)
RuntimeError: ('The name "Discriminator" is used 2 times in the model. All layer names should be unique. Layer names: ', ['input_3', 'input_4', 'lambda_3', 'lambda_4', 'lambda_5', 'lambda_6', 'model_3', 'Discriminator', 'Discriminator'])
It‘s only a guess and I‘m not able to test it out myself but it seems strange to me that you build your models with tf.device on cpu and then try to call a multi_gpu convenience function. So you could try wthout the device placement.
This is a piece of code I get from github for hierarchical attention network,the code is originally in Keras 1.2.2. now I have to change it to compile with Keras 2.0.5, however, it has such error messages that I could not solve.
The original code is the following
MAX_SENT_LENGTH = 100
MAX_SENTS = 20
MAX_NB_WORDS = 276176
EMBEDDING_DIM = 128
VALIDATION_SPLIT = 0.1
# Feed the data
# Here you have source data
x_train = np.load('./data/X_full_train_data.npy')
y_train = np.load('./data/X_full_train_labels.npy')
x_val = np.load('./data/X_full_test_data.npy')
y_val = np.load('./data/X_full_test_labels.npy')
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y_train)))
x_train = x_train[shuffle_indices]
y_train = y_train[shuffle_indices]
shuffle_indices = np.random.permutation(np.arange(len(y_val)))
x_val = x_train[shuffle_indices]
y_val = y_train[shuffle_indices]
with open("./data/W.npy", "rb") as fp:
embedding_weights = np.load(fp)
# here you feed embeding matrix
embedding_layer = Embedding(MAX_NB_WORDS,
EMBEDDING_DIM,
weights=[embedding_weights],
input_length=MAX_SENT_LENGTH,
trainable=True)
# building Hierachical Attention network
class AttLayer(Layer):
def __init__(self, **kwargs):
self.init = initializers.get('normal')
super(AttLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape)==3
self.W = self.init((input_shape[-1],))
self.trainable_weights = [self.W]
super(AttLayer, self).build(input_shape)
def call(self, x, mask=None):
eij = K.tanh(K.dot(x, self.W))
ai = K.exp(eij)
weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x')
weighted_input = x*weights.dimshuffle(0,1,'x')
ret = weighted_input.sum(axis=1)
return ret
#def get_output_shape_for(self, input_shape):
def compute_output_shape(self,input_shape):
return (input_shape[0], input_shape[-1])
sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
l_lstm = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
l_dense = TimeDistributed(Dense(200))(l_lstm)
l_att = AttLayer()(l_lstm)
sentEncoder = Model(sentence_input, l_att)
review_input = Input(shape=(MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')
review_encoder = TimeDistributed(sentEncoder)(review_input)
l_lstm_sent = Bidirectional(GRU(100, return_sequences=True))(review_encoder)
l_dense_sent = TimeDistributed(Dense(200))(l_lstm_sent)
l_att_sent = AttLayer()(l_lstm_sent)
preds = Dense(3, activation='softmax')(l_att_sent)
model = Model(input=review_input, output=preds)
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['categorical_accuracy'])
print("model fitting - Hierachical attention network")
print(model.summary())
model.fit(x_train, y_train, nb_epoch=10, batch_size=32, validation_data=(x_val,y_val))
predictions = model.predict(x_val)
score, acc = model.evaluate(x_val, y_val,batch_size=32)
Then I have the following error
textClassifierHATT.py:235: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
model.fit(x_train, y_train, nb_epoch=10, batch_size=32, validation_data=(x_val,y_val))
Traceback (most recent call last):
File "textClassifierHATT.py", line 235, in <module>
model.fit(x_train, y_train, nb_epoch=10, batch_size=32, validation_data=(x_val,y_val))
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/engine/training.py", line 1575, in fit
self._make_train_function()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/engine/training.py", line 960, in _make_train_function
loss=self.total_loss)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/optimizers.py", line 226, in get_updates
accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/optimizers.py", line 226, in <listcomp>
accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/backend/theano_backend.py", line 275, in int_shape
raise TypeError('Not a Keras tensor:', x)
TypeError: ('Not a Keras tensor:', Elemwise{add,no_inplace}.0)
the keras model compile succesfully in model.compile(), but it has error in model.fit(), I totally don't understand why such error exists. anyone can tell me how to modify it so that it can run with keras 2.0 Thanks a lot.
The problem is on the build method of your custom layer, according to keras' documentation, you need to create the weights with the self.add_weight function:
def build(self, input_shape):
assert len(input_shape)==3
self.W = self.add_weight(name='kernel',
shape=(input_shape[-1],),
initializer='normal',
trainable=True)
super(AttLayer, self).build(input_shape)
That and a few API changes:
Parameter input and output changed in Model(inputs=.., outputs=..)
The nb_epochs parameter in fit is now called epochs
The data provided for training is not a tensor but fed as a numpy array. Try to convert the numpy arrays to tensor using :
import tensorflow as tf
tf.convert_to_tensor(
value, dtype=None, dtype_hint=None, name=None
)
Then pass them to the model for training.