I build a GAN network that predict a output of the shape (40,40,6) form two inputs of the shapes [(40,40,4),(20,20,6)].
The model is actually working and already delivers results but I "only" get a GPU utilization between 60 and 70% (displayed by nvidia-smi).
My question is if that is intrinsic for such a model as it has to do stuff in between the calls of train_on_batch or if there is way to speed this process up?
A minimalist working example on random data would look like:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import UpSampling3D
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import Lambda
from tensorflow.keras.optimizers import Adam
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
# =============================================================================
# define the model
# =============================================================================
def resBlock(X_in, num_of_features, kernel_size, scale):
x = Conv2D(num_of_features, kernel_size, kernel_initializer='he_uniform', padding='same')(X_in)
x = Activation('relu')(x)
x = Conv2D(num_of_features, kernel_size, kernel_initializer='he_uniform', padding='same')(x)
x = Lambda(lambda x: x * scale)(x)
X_out = Add()([X_in,x])
return X_out
class Generator(object):
def __init__(self, noise_shape):
self.noise_shape = noise_shape
self.num_of_features = 128
self.kernel_size = (3,3)
self.scale = 0.1
self.padding=8
self.hp = int(self.padding/2) # half padding
def generator(self):
# get the inputs and do upsampling
inputs_channels_A = Input((32+self.padding,32+self.padding,4),name = 'input_A')
inputs_channels_B = Input((16+self.hp,16+self.hp,6),name = 'input_B')
inputs_channels_B_upsampled = UpSampling3D(size = (2,2,1))(inputs_channels_B)
# concentrate everything
concentrated_input = concatenate([inputs_channels_A,
inputs_channels_B_upsampled],
axis=3,)
# do the first convolution
x = Conv2D(self.num_of_features,
self.kernel_size,
activation = 'relu',
padding = 'same',
kernel_initializer = 'he_normal')(concentrated_input)
# do the resBlock iterations
for resblock_index in range(6):
x = resBlock(x,self.num_of_features, self.kernel_size, self.scale)
# doing the last conv to resize it to (28,28,6)
x = Conv2D(6, (3, 3), kernel_initializer='he_uniform', padding='same')(x)
# last scipt connection
output = Add()([x,inputs_channels_B_upsampled])
# defining model
generator_model = Model(inputs = [inputs_channels_A,inputs_channels_B], outputs = output)
return generator_model
def discriminator_block(model, filters, kernel_size, strides):
model = Conv2D(filters = filters, kernel_size = kernel_size, strides = strides, padding = "same")(model)
model = BatchNormalization(momentum = 0.5)(model)
model = LeakyReLU(alpha = 0.2)(model)
return model
class Discriminator(object):
def __init__(self, image_shape):
self.image_shape = image_shape
def discriminator(self):
dis_input = Input(shape = (self.image_shape))
model = Conv2D(filters = 64, kernel_size = 3, strides = 1, padding = "same")(dis_input)
model = LeakyReLU(alpha = 0.2)(model)
model = discriminator_block(model, 64, 3, 2)
model = discriminator_block(model, 128, 3, 1)
model = discriminator_block(model, 128, 3, 2)
model = discriminator_block(model, 256, 3, 1)
model = discriminator_block(model, 256, 3, 2)
model = discriminator_block(model, 512, 3, 1)
model = discriminator_block(model, 512, 3, 2)
model = Flatten()(model)
model = Dense(1024)(model)
model = LeakyReLU(alpha = 0.2)(model)
model = Dense(1)(model)
model = Activation('sigmoid')(model)
discriminator_model = Model(inputs = dis_input, outputs = model)
return discriminator_model
def get_gan_network(discriminator, shape_list_AB, generator, optimizer, loss):
discriminator.trainable = False
gan_input_A = Input(shape=shape_list_AB[0])
gan_input_B = Input(shape=shape_list_AB[1])
x = generator([gan_input_A,gan_input_B])
gan_output = discriminator(x)
gan = Model(inputs=[gan_input_A,gan_input_B], outputs=[x,gan_output])
gan.compile(loss=[loss, "binary_crossentropy"], loss_weights=[1., 1e-3], optimizer=optimizer)
return gan
def get_optimizer():
adam = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
return adam
# =============================================================================
# choose some parameters and compile the model
# =============================================================================
batch_size = 128
shape_input_A = (40,40,4)
shape_input_B = (20,20,6)
shape_output = (40,40,6)
generator = Generator(shape_input_B).generator() # todo shape
discriminator = Discriminator(shape_output).discriminator() # todo shape
optimizer = get_optimizer()
generator.compile(loss="mse", optimizer=optimizer)
discriminator.compile(loss="binary_crossentropy", optimizer=optimizer)
gan = get_gan_network(discriminator, [shape_input_A,shape_input_B], generator, optimizer, "mse")
# =============================================================================
# training
# =============================================================================
def get_random_data(mod):
# get the networks input
if mod == 0:
return [np.random.rand(batch_size,40,40,4),np.random.rand(batch_size,20,20,6)]
# get the networks output
else:
return np.random.rand(batch_size,40,40,6)
# initalize empty arrays
rand_nums = np.empty(batch_size,dtype=np.int)
image_batch_lr = np.empty((batch_size,)+shape_input_B)
image_batch_hr = np.empty((batch_size,)+shape_output)
generated_images_sr = np.empty_like(image_batch_hr)
real_data_Y = np.empty(batch_size)
fake_data_Y = np.empty(batch_size)
for e in range(1, 10):
print("epoch:",e)
for batchindex in range(200):
generated_images_sr[:] = generator.predict(get_random_data(0))
real_data_Y[:] = np.ones(batch_size) - np.random.random_sample(batch_size)*0.2
fake_data_Y[:] = np.random.random_sample(batch_size)*0.2
discriminator.trainable = True
d_loss_real = discriminator.train_on_batch(get_random_data(1), real_data_Y)
d_loss_fake = discriminator.train_on_batch(generated_images_sr, fake_data_Y)
discriminator_loss = 0.5 * np.add(d_loss_fake, d_loss_real)
gan_Y = np.ones(batch_size) - np.random.random_sample(batch_size)*0.2
discriminator.trainable = False
gan_loss = gan.train_on_batch(get_random_data(0), [get_random_data(1),gan_Y])
print("discriminator_loss : %f" % discriminator_loss)
print("gan_loss :", gan_loss)
I run this code on my GTX2080 within a docker container tensorflow/tensorflow:2.0.0-gpu-py3.
Training a GAN implies some overhead that will not be executed on the GPU. In your case, obtaining real_data_Y and fake_data_Y, executing get_random_data() and computing the loss will result in GPU idle time.
You can try profiling your program with python -mcProfile -o performance.prof xxx.py and see if there are bottlenecks that can be improved, but 60 to 70% already seems not too bad.
Related
I am training a model for text sentiment classification with CNN. In it, the validation accuracy is initially more than training accuracy and then it decreases. Is this behavior acceptable? If not then what can be the reason and how to solve it?
My model:
class hyper():
def __init__(self,embedding_dim,filter_sizes,num_filters,dropout_prob,hidden_dims,batch_size,num_epochs):
# Model Hyperparameters
self.embedding_dim = embedding_dim
self.filter_sizes = filter_sizes
self.num_filters = num_filters
self.dropout_prob = dropout_prob
self.hidden_dims = hidden_dims
# Training parameters
self.batch_size = batch_size
self.num_epochs = num_epochs
class prep_hyper():
def __init__(self,sequenceLength,max_words):
# Prepossessing parameters
self.sequenceLength = sequenceLength
self.max_words = max_words
m_hyper=hyper(embedding_dim=embed_dim,filter_sizes=(3,4,5,6,8),num_filters=80,dropout_prob=(0.2,0.5),
hidden_dims=50,batch_size=128,num_epochs= 30)
pr_hyper = prep_hyper(sequenceLength=sequence_length,max_words=vocab_size)
model architecture:
def build_model(pr_hyper,m_hyper):
# Convolutional block
model_input = Input(shape=(pr_hyper.sequenceLength))
# use a random embedding for the text
x = Embedding(pr_hyper.max_words, m_hyper.embedding_dim,weights=[emb],trainable=False)(model_input)
# x = SpatialDropout1D(m_hyper.dropout_prob[0])(x)
conv_kern_reg = regularizers.l2(0.0001)
conv_bias_reg = regularizers.l2(0.0001)
conv_blocks = []
for sz in m_hyper.filter_sizes:
conv = Convolution1D(filters=m_hyper.num_filters,
kernel_size=sz,
# padding="same",
activation="relu",
strides=1,
kernel_regularizer=conv_kern_reg,
bias_regularizer=conv_bias_reg
)(x)
conv = GlobalMaxPooling1D()(conv)
conv_blocks.append(conv)
# merge
x = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0]
x = Dense(m_hyper.hidden_dims, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
x = Dense(100, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
model_output = Dense(3, activation="softmax")(x)
model = Model(model_input, model_output)
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.00005), metrics=["accuracy"]) #categorical_crossentropy
print(model.summary())
tf.keras.utils.plot_model(model, show_shapes=True)#, to_file='multichannel.png')
return model
INITIAL EPOCHS:
There are several reasons that this happens, like, the dropout layers is disabled during validation. For more information I would suggest you to see this
that describes several possible reasons that this happens.
I just found a code from github, and want to test and learn the code.
but when I run the code it show up "Failed to convert object of type <class 'tensorflow.python.data.ops.dataset_ops._VariantDataset'> to Tensor."
could some one can help me?
thanks!
(https://github.com/tensorflow/tensorflow/issues/33487)
here is the code:
from time import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LSTM, Bidirectional, Conv1D
from tensorflow.keras.layers import Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# to disable Eager in 2.0: tf.enable_eager_execution()
# to enable Eager in 1.14: tf.compat.v1.disable_eager_execution()
def timeit(func, iterations, *args):
t0 = time()
for _ in range(iterations):
func(*args)
print("Time/iter: %.4f sec" % ((time() - t0) / iterations))
def make_small_model(batch_shape):
ipt = Input(batch_shape=batch_shape)
x = Conv1D(128, 400, strides=4, padding='same')(ipt)
x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
out = Dense(1, activation='sigmoid')(x)
model = Model(ipt, out)
model.compile(Adam(lr=1e-4), 'binary_crossentropy')
return model
def make_medium_model(batch_shape):
ipt = Input(batch_shape=batch_shape)
x = Bidirectional(LSTM(512, activation='relu', return_sequences=True))(ipt)
x = LSTM(512, activation='relu', return_sequences=True)(x)
x = Conv1D(128, 400, strides=4, padding='same')(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
out = Dense(1, activation='sigmoid')(x)
model = Model(ipt, out)
model.compile(Adam(lr=1e-4), 'binary_crossentropy')
return model
def make_data(batch_shape, n_batches):
data = np.random.randn(n_batches, *batch_shape),
trgt = np.random.randint(0, 2, (n_batches, batch_shape[0], 1))
return tf.data.Dataset.from_tensor_slices((data, trgt))
batch_shape = (32, 400, 16)
data = make_data(batch_shape, n_batches=10)
model = make_medium_model(batch_shape) # OR change to make_small_model
model.train_on_batch(data.take(1))
timeit(lambda: model.fit(data, steps_per_epoch=10), 10)
I had a basic keras network predicting one value and it worked fine. I tried adding support for predicting 4 values, but when I do that, the output instantly trends to 0. Right now, the network is the simplistic model just for testing.
The input for the network is an array of shape (90,) and the output should have 4 values. I tried having an output layer with 4 nodes as well, but that showed the same behavior of this one. I've also tried some various loss functions.
def runNN(training_data,training_labels,test_data,test_labels, model = None):
if model == None:
inp = (Input(shape = (90,), name = 'input'))
model = (Dense(units = 90, activation='relu'(inp)
model = (Dropout(0.5))(model)
model = (Dense(units = 180, activation='relu'))(model)
model = (Dropout(0.5))(model)
output1 = Dense(1, activation = 'relu', name = 'preretirement')(model)
output2 = Dense(1, activation = 'relu',name = 'cola')(model)
output3 = Dense(1, activation = 'relu',name = 'initialNC')(model)
output4 = Dense(1, activation = 'relu',name = 'finalNC')(model)
complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4])
optimizer = Adam(lr = .0003)
complete_model.compile(loss='mean_absolute_error',
optimizer=optimizer,
metrics=['mean_absolute_error'])
complete_model.fit(training_data, {'preretirement' : training_labels[0],
'cola' : training_labels[1],
'initialNC' : training_labels[2],
'finalNC' : training_labels[3]},
epochs = 10, batch_size = 128)
The output after 1 epoch, and anything afterwards, is [0,0,0,0] for each test point. It should be a 4 item list with values between 0 and 1 such as [.34,.56,.12,.87]
Probably you're doing a prediction task or function fitting task. Two suggestions might help you:
sigmoid usually works better than relu in prediction task.
Do not use activation function at the final output layer.
The code below is modified from yours and it works fine.
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam
dropout_rate = .5
activate_function = 'sigmoid'
num_iteration = 20
inp = Input(shape = (90,), name = 'input')
model = Dense(units = 90, activation=activate_function)(inp)
model = Dropout(rate=dropout_rate)(model)
model = Dense(units = 180, activation=activate_function)(model)
model = Dropout(rate=dropout_rate)(model)
output1 = Dense(units=1, name = 'preretirement')(model)
output2 = Dense(units=1, name = 'cola')(model)
output3 = Dense(units=1, name = 'initialNC')(model)
output4 = Dense(units=1, name = 'finalNC')(model)
# # Your original code
# output1 = Dense(units=1, activation = activate_function, name = 'preretirement')(model)
# output2 = Dense(units=1, activation = activate_function,name = 'cola')(model)
# output3 = Dense(units=1, activation = activate_function,name = 'initialNC')(model)
# output4 = Dense(units=1, activation = activate_function,name = 'finalNC')(model)
complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4])
optimizer = Adam(lr = .0003)
complete_model.compile(loss='mean_absolute_error',
optimizer=optimizer,
metrics=['mean_absolute_error'])
# generate data for training the model
import numpy as np
num_train = 4000 # the number of training instances
# a normal distribution with mean=2, variance=1
training_data = np.random.normal(2, 1, (num_train, 90))
training_labels = np.zeros(shape=(num_train, 4))
for i in range(num_train):
tmp = np.sum(training_data[i, :])/90.0
training_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp]
print(training_data.shape, training_labels.shape)
# generate data for testing the model
test_data = np.random.normal(0, 1, (10, 90)) # 10 test instances
test_labels = np.zeros(shape=(10, 4))
for i in range(10):
tmp = np.sum(training_data[i, :])/90.0
test_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp]
print(test_data.shape, test_labels.shape)
complete_model.fit(training_data, {'preretirement' : training_labels[:, 0],
'cola' : training_labels[:, 1],
'initialNC' : training_labels[:, 2],
'finalNC' : training_labels[:, 3]},
epochs = num_iteration,
batch_size = 128)
results = complete_model.predict(test_data)
for i in range(10):
print('true', test_labels[i])
print('predicted', results[0][i, 0], results[1][i, 0], results[2][i, 0], results[3][i, 0])
print('--------------------------')
The code produces
The input are 3 independent channels of 1000 features. I'm trying to pass each channel through a independent NN path, then concatenate them into a flat layer. Then apply a FCN on the flatten layer for a binary classification.
I'm trying to add multiple Dense layers together, like this:
def tst_1():
inputs = Input((3, 1000, 1))
dense10 = Dense(224, activation='relu')(inputs[0,:,1])
dense11 = Dense(112, activation='relu')(dense10)
dense12 = Dense(56, activation='relu')(dense11)
dense20 = Dense(224, activation='relu')(inputs[1,:,1])
dense21 = Dense(112, activation='relu')(dense20)
dense22 = Dense(56, activation='relu')(dense21)
dense30 = Dense(224, activation='relu')(inputs[2,:,1])
dense31 = Dense(112, activation='relu')(dense30)
dense32 = Dense(56, activation='relu')(dense31)
flat = keras.layers.Add()([dense12, dense22, dense32])
dense1 = Dense(224, activation='relu')(flat)
drop1 = Dropout(0.5)(dense1)
dense2 = Dense(112, activation='relu')(drop1)
drop2 = Dropout(0.5)(dense2)
dense3 = Dense(32, activation='relu')(drop2)
densef = Dense(1, activation='sigmoid')(dense3)
model = Model(inputs = inputs, outputs = densef)
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
return model
model = tst_1()
model.summary()
but I got this error:
/usr/local/lib/python2.7/dist-packages/keras/engine/network.pyc in build_map(tensor, finished_nodes, nodes_in_progress, layer, node_index, tensor_index)
1310 ValueError: if a cycle is detected.
1311 """
-> 1312 node = layer._inbound_nodes[node_index]
1313
1314 # Prevent cycles.
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'
The problem is that splitting the input data using inputs[0,:,1] is not done as a keras layer.
You need to create a Lambda layer to be able to accomplish this.
The following code:
from keras import layers
from keras.layers import Input, Add, Dense,Dropout, Lambda, Concatenate
from keras.layers import Flatten
from keras.optimizers import Adam
from keras.models import Model
import keras.backend as K
def tst_1():
num_channels = 3
inputs = Input(shape=(num_channels, 1000, 1))
branch_outputs = []
for i in range(num_channels):
# Slicing the ith channel:
out = Lambda(lambda x: x[:, i, :, :], name = "Lambda_" + str(i))(inputs)
# Setting up your per-channel layers (replace with actual sub-models):
out = Dense(224, activation='relu', name = "Dense_224_" + str(i))(out)
out = Dense(112, activation='relu', name = "Dense_112_" + str(i))(out)
out = Dense(56, activation='relu', name = "Dense_56_" + str(i))(out)
branch_outputs.append(out)
# Concatenating together the per-channel results:
out = Concatenate()(branch_outputs)
dense1 = Dense(224, activation='relu')(out)
drop1 = Dropout(0.5)(dense1)
dense2 = Dense(112, activation='relu')(drop1)
drop2 = Dropout(0.5)(dense2)
dense3 = Dense(32, activation='relu')(drop2)
densef = Dense(1, activation='sigmoid')(dense3)
model = Model(inputs = inputs, outputs = densef)
return model
Net = tst_1()
Net.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
Net.summary()
correctly created the net that you want.
Thanks to #CAta.RAy
I solved it in this way:
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense,Dropout, Lambda
from keras.layers import Flatten
from keras.optimizers import Adam
from keras.models import Model
import keras.backend as K
def tst_1():
inputs = Input((3, 1000))
x1 = Lambda(lambda x:x[:,0])(inputs)
dense10 = Dense(224, activation='relu')(x1)
dense11 = Dense(112, activation='relu')(dense10)
dense12 = Dense(56, activation='relu')(dense11)
x2 = Lambda(lambda x:x[:,1])(inputs)
dense20 = Dense(224, activation='relu')(x2)
dense21 = Dense(112, activation='relu')(dense20)
dense22 = Dense(56, activation='relu')(dense21)
x3 = Lambda(lambda x:x[:,2])(inputs)
dense30 = Dense(224, activation='relu')(x3)
dense31 = Dense(112, activation='relu')(dense30)
dense32 = Dense(56, activation='relu')(dense31)
flat = Add()([dense12, dense22, dense32])
dense1 = Dense(224, activation='relu')(flat)
drop1 = Dropout(0.5)(dense1)
dense2 = Dense(112, activation='relu')(drop1)
drop2 = Dropout(0.5)(dense2)
dense3 = Dense(32, activation='relu')(drop2)
densef = Dense(1, activation='sigmoid')(dense3)
model = Model(inputs = inputs, outputs = densef)
return model
Net = tst_1()
Net.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
Net.summary()
I'm trying to train a simple GAN on GPU with Keras. I verified that the code runs with CPU on my laptop. Then, I added multi_gpu_model as per below to enable it to run on a set of GPUs. However, I am getting the following error:
RuntimeError: ('The name "Discriminator" is used 2 times in the model. All layer names should be unique. Layer names: ', ['input_3', 'input_4', 'lambda_3', 'lambda_4', 'lambda_5', 'lambda_6', 'model_3', 'Discriminator', 'Discriminator'])
The error seems to suggest that because I'm calling the Discriminator model multiple times, the name of the layers overlap. However, it is unclear to me how I can get around this problem. For your convenience, the full code is provided below:
from __future__ import print_function, division
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.merge import _Merge
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D, Convolution2D, Conv2DTranspose
from keras.models import Sequential, Model
from keras.optimizers import Adam, RMSprop
from keras.utils import multi_gpu_model
import keras.backend as K
from optimizer import optimAdam
from functools import partial
# import inception_score
import _pickle as cPickle
import matplotlib.pyplot as plt
import os
import sys
import tensorflow as tf
import numpy as np
from PIL import Image
BATCH_SIZE = 128
GRADIENT_PENALTY_WEIGHT = 10
class RandomWeightedAverage(_Merge):
"""Takes a randomly-weighted average of two tensors. In geometric terms, this outputs a random point on the line
between each pair of input points.
Inheriting from _Merge is a little messy but it was the quickest solution I could think of.
Improvements appreciated."""
def _merge_function(self, inputs):
weights = K.random_uniform((BATCH_SIZE, 1, 1, 1))
return (weights * inputs[0]) + ((1 - weights) * inputs[1])
class GANGPU():
def __init__(self,gan_type):
assert gan_type in ['gan','wgan','improved_wgan','optim']
print("GAN Type: " + gan_type)
self.type = gan_type
self.noise_shape = (100,)
self.img_shape = (28, 28, 1)
self.clip_value = 0.0001 # threshold for weight cliping (-c,c)
self.d_losses = []
real_img = Input(shape=self.img_shape)
# set gan type specific parameters
optimizer = self.select_optimizer()
loss = self.select_loss()
self.n_critic = self.select_n_critic()
# Now we initialize the generator and discriminator.
generator = self.make_generator()
discriminator = self.make_discriminator()
# The parallel_generator_model is used when we want to train the generator layers.
# As such, we ensure that the discriminator layers are not trainable.
for layer in discriminator.layers:
layer.trainable = False
discriminator.trainable = False
generator_input = Input(shape=(100,))
generator_layers = generator(generator_input)
discriminator_layers_for_generator = discriminator(generator_layers)
generator_model = Model(inputs=[generator_input], outputs=[discriminator_layers_for_generator])
# We use the Adam paramaters from Gulrajani et al.
parallel_generator_model = multi_gpu_model(generator_model, gpus=2)
parallel_generator_model.compile(optimizer=optimizer, loss=loss)
# Now that the parallel_generator_model is compiled, we can make the discriminator layers trainable.
for layer in discriminator.layers:
layer.trainable = True
for layer in generator.layers:
layer.trainable = False
discriminator.trainable = True
generator.trainable = False
# The parallel_discriminator_model is more complex. It takes both real image samples and random noise seeds as input.
# The noise seed is run through the generator model to get generated images. Both real and generated images
# are then run through the discriminator.
real_samples = Input(shape=self.img_shape)
generator_input_for_discriminator = Input(shape=self.noise_shape)
generated_samples_for_discriminator = generator(generator_input_for_discriminator)
discriminator_output_from_generator = discriminator(generated_samples_for_discriminator)
discriminator_output_from_real_samples = discriminator(real_samples)
if self.type in ['gan','wgan']:
discriminator_model = Model(inputs=[real_samples, generator_input_for_discriminator],
outputs=[discriminator_output_from_real_samples,
discriminator_output_from_generator])
parallel_discriminator_model = multi_gpu_model(discriminator_model, gpus=2)
parallel_discriminator_model.compile(optimizer=optimizer,
loss=[loss,
loss])
elif self.type in ['improved_wgan','optim']:
print("Gradient Penalty Applied")
# We also need to generate weighted-averages of real and generated samples, to use for the gradient norm penalty.
averaged_samples = RandomWeightedAverage()([real_samples, generated_samples_for_discriminator])
# We then run these samples through the discriminator as well. Note that we never really use the discriminator
# output for these samples - we're only running them to get the gradient norm for the gradient penalty loss.
averaged_samples_out = discriminator(averaged_samples)
# The gradient penalty loss function requires the input averaged samples to get gradients. However,
# Keras loss functions can only have two arguments, y_true and y_pred. We get around this by making a partial()
# of the function with the averaged samples here.
partial_gp_loss = partial(self.gradient_penalty_loss,
averaged_samples=averaged_samples,
gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT)
partial_gp_loss.__name__ = 'gradient_penalty' # Functions need names or Keras will throw an error
discriminator_model = Model(inputs=[real_samples, generator_input_for_discriminator],
outputs=[discriminator_output_from_real_samples,
discriminator_output_from_generator,
averaged_samples_out])
parallel_discriminator_model = multi_gpu_model(discriminator_model, gpus=2)
parallel_discriminator_model.compile(optimizer=optimizer,
loss=[loss,
loss,
partial_gp_loss])
self.parallel_generator_model, self.parallel_discriminator_model = parallel_generator_model, parallel_discriminator_model
self.generator, self.discriminator = generator, discriminator
def select_optimizer(self):
if self.type == 'gan':
print("Optimizer: Adam")
return Adam(lr=0.0002, beta_1=0.5)
elif self.type == 'wgan':
print("Optimizer: RMSProp")
return RMSprop(lr=0.00005)
elif self.type == 'improved_wgan':
print("Optimizer: Adam")
return Adam(lr=0.0001, beta_1=0.5, beta_2=0.9)
elif self.type == 'optim':
print("Optimizer: OptimAdam")
return optimAdam(lr=0.0001, beta_1=0.5, beta_2=0.9)
def select_loss(self):
if self.type == 'gan':
print("Loss: Binary Cross Entropy")
return 'binary_crossentropy'
elif self.type in ['wgan','improved_wgan','optim']:
print("Loss: Wasserstein")
return self.wasserstein_loss
def select_n_critic(self):
if self.type == 'gan':
print("Critics Ratio: 1")
return 1
elif self.type in ['wgan','improved_wgan','optim']:
print("Critics Ratio: 5")
return 5
# for WGAN, Improved WGAN, Optim
def wasserstein_loss(self, y_true, y_pred):
return K.mean(y_true * y_pred)
# for Improved WGAN, Optim
def gradient_penalty_loss(self, y_true, y_pred, averaged_samples, gradient_penalty_weight):
gradients = K.gradients(K.sum(y_pred), averaged_samples)
gradient_l2_norm = K.sqrt(K.sum(K.square(gradients)))
gradient_penalty = gradient_penalty_weight * K.square(1 - gradient_l2_norm)
return gradient_penalty
def make_generator(self):
# 2-layer fully connected NN: 100 x 512 x 784
model = Sequential(name='Generator')
model.add(Dense(256, activation="relu", input_dim=100))
model.add(Dense(np.prod(self.img_shape), activation='tanh'))
model.add(Reshape(self.img_shape))
return model
def make_discriminator(self):
# 2-layer fully connected NN: 784 x 512 x 1
model = Sequential(name='Discriminator')
model.add(Flatten(input_shape=self.img_shape))
model.add(Dense(512, activation="relu"))
model.add(Dense(1, activation='sigmoid'))
return model
def generate_images(self, output_dir, epoch):
"""Feeds random seeds into the generator and tiles and saves the output to a PNG file."""
def tile_images(image_stack):
"""Given a stacked tensor of images, reshapes them into a horizontal tiling for display."""
assert len(image_stack.shape) == 3
image_list = [image_stack[i, :, :] for i in range(image_stack.shape[0])]
tiled_images = np.concatenate(image_list, axis=1)
return tiled_images
test_image_stack = self.generator.predict(np.random.rand(100, 100))
test_image_stack = (test_image_stack * 127.5) + 127.5
test_image_stack = np.squeeze(np.round(test_image_stack).astype(np.uint8))
tiled_output = tile_images(test_image_stack)
tiled_output = Image.fromarray(tiled_output, mode='L') # L specifies greyscale
outfile = os.path.join(output_dir, 'epoch_{}.png'.format(epoch))
tiled_output.save(outfile)
outfile = os.path.join(output_dir, 'epoch_{}.pkl'.format(epoch))
with open(outfile, 'wb') as f:
cPickle.dump(test_image_stack, f)
def train(self, epochs, batch_size=128, save_interval=50):
# First we load the image data, reshape it and normalize it to the range [-1, 1]
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.concatenate((X_train, X_test), axis=0)
if K.image_data_format() == 'channels_first':
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1], X_train.shape[2]))
else:
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
# We make three label vectors for training. positive_y is the label vector for real samples, with value 1.
# negative_y is the label vector for generated samples, with value -1. The dummy_y vector is passed to the
# gradient_penalty loss function and is not used.
positive_y = np.ones((batch_size, 1), dtype=np.float32)
negative_y = -positive_y
if self.type in ['improved_wgan','optim']:
dummy_y = np.zeros((batch_size, 1), dtype=np.float32)
self.discriminator_losses = []
self.generator_losses = []
output_dir = '../log_'+self.type
for epoch in range(epochs):
np.random.shuffle(X_train)
print("Epoch: ", epoch)
print("Number of batches: ", int(X_train.shape[0] // batch_size))
discriminator_loss = []
generator_loss = []
minibatches_size = batch_size * self.n_critic
for i in range(int(X_train.shape[0] // (batch_size * self.n_critic))):
# ---------------------
# Train Discriminator
# ---------------------
discriminator_minibatches = X_train[i * minibatches_size:(i + 1) * minibatches_size]
for j in range(self.n_critic):
image_batch = discriminator_minibatches[j * batch_size:(j + 1) * batch_size]
noise = np.random.rand(batch_size, 100).astype(np.float32)
if self.type in ['gan','wgan']:
discriminator_loss.append(self.parallel_discriminator_model.train_on_batch([image_batch, noise],
[positive_y, negative_y]))
elif self.type in ['improved_wgan','optim']:
discriminator_loss.append(self.parallel_discriminator_model.train_on_batch([image_batch, noise],
[positive_y, negative_y, dummy_y]))
if self.type == 'wgan':
# Clip discriminator weights
for l in self.parallel_discriminator_model.layers:
weights = l.get_weights()
weights = [np.clip(w, -self.clip_value, self.clip_value) for w in weights]
l.set_weights(weights)
# ---------------------
# Train Generator
# ---------------------
noise = np.random.normal(0, 1, (batch_size, 100))
generator_loss.append(self.parallel_generator_model.train_on_batch(noise, positive_y))
# If at save interval => save generated image samples
if epoch % save_interval == 0:
self.generate_images(output_dir, epoch)
self.generator.save_weights(os.path.join(output_dir, 'epoch_{}_g.h5'.format(epoch)))
self.discriminator.save_weights(os.path.join(output_dir, 'epoch_{}_d.h5'.format(epoch)))
self.discriminator_losses.append(discriminator_loss)
self.generator_losses.append(generator_loss)
if __name__ == '__main__':
gan = GANGPU('gan')
gan.train(100, batch_size=BATCH_SIZE, save_interval=1)
Here's the full traceback:
Traceback (most recent call last):
File "gangpu.py", line 278, in <module>
gan = GANGPU('gan')
File "gangpu.py", line 96, in __init__
parallel_discriminator_model = multi_gpu_model(discriminator_model, gpus=2)
File "/n/home06/koshiba/.conda/envs/Keras7/lib/python3.6/site-packages/keras/utils/multi_gpu_utils.py", line 189, in multi_gpu_model
return Model(model.inputs, merged)
File "/n/home06/koshiba/.conda/envs/Keras7/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/n/home06/koshiba/.conda/envs/Keras7/lib/python3.6/site-packages/keras/engine/topology.py", line 1829, in __init__
'Layer names: ', all_names)
RuntimeError: ('The name "Discriminator" is used 2 times in the model. All layer names should be unique. Layer names: ', ['input_3', 'input_4', 'lambda_3', 'lambda_4', 'lambda_5', 'lambda_6', 'model_3', 'Discriminator', 'Discriminator'])
It‘s only a guess and I‘m not able to test it out myself but it seems strange to me that you build your models with tf.device on cpu and then try to call a multi_gpu convenience function. So you could try wthout the device placement.