I build a GAN network that predict a output of the shape (40,40,6) form two inputs of the shapes [(40,40,4),(20,20,6)].
The model is actually working and already delivers results but I "only" get a GPU utilization between 60 and 70% (displayed by nvidia-smi).
My question is if that is intrinsic for such a model as it has to do stuff in between the calls of train_on_batch or if there is way to speed this process up?
A minimalist working example on random data would look like:
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import UpSampling3D
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import Lambda
from tensorflow.keras.optimizers import Adam
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
# =============================================================================
# define the model
# =============================================================================
def resBlock(X_in, num_of_features, kernel_size, scale):
x = Conv2D(num_of_features, kernel_size, kernel_initializer='he_uniform', padding='same')(X_in)
x = Activation('relu')(x)
x = Conv2D(num_of_features, kernel_size, kernel_initializer='he_uniform', padding='same')(x)
x = Lambda(lambda x: x * scale)(x)
X_out = Add()([X_in,x])
return X_out
class Generator(object):
def __init__(self, noise_shape):
self.noise_shape = noise_shape
self.num_of_features = 128
self.kernel_size = (3,3)
self.scale = 0.1
self.padding=8
self.hp = int(self.padding/2) # half padding
def generator(self):
# get the inputs and do upsampling
inputs_channels_A = Input((32+self.padding,32+self.padding,4),name = 'input_A')
inputs_channels_B = Input((16+self.hp,16+self.hp,6),name = 'input_B')
inputs_channels_B_upsampled = UpSampling3D(size = (2,2,1))(inputs_channels_B)
# concentrate everything
concentrated_input = concatenate([inputs_channels_A,
inputs_channels_B_upsampled],
axis=3,)
# do the first convolution
x = Conv2D(self.num_of_features,
self.kernel_size,
activation = 'relu',
padding = 'same',
kernel_initializer = 'he_normal')(concentrated_input)
# do the resBlock iterations
for resblock_index in range(6):
x = resBlock(x,self.num_of_features, self.kernel_size, self.scale)
# doing the last conv to resize it to (28,28,6)
x = Conv2D(6, (3, 3), kernel_initializer='he_uniform', padding='same')(x)
# last scipt connection
output = Add()([x,inputs_channels_B_upsampled])
# defining model
generator_model = Model(inputs = [inputs_channels_A,inputs_channels_B], outputs = output)
return generator_model
def discriminator_block(model, filters, kernel_size, strides):
model = Conv2D(filters = filters, kernel_size = kernel_size, strides = strides, padding = "same")(model)
model = BatchNormalization(momentum = 0.5)(model)
model = LeakyReLU(alpha = 0.2)(model)
return model
class Discriminator(object):
def __init__(self, image_shape):
self.image_shape = image_shape
def discriminator(self):
dis_input = Input(shape = (self.image_shape))
model = Conv2D(filters = 64, kernel_size = 3, strides = 1, padding = "same")(dis_input)
model = LeakyReLU(alpha = 0.2)(model)
model = discriminator_block(model, 64, 3, 2)
model = discriminator_block(model, 128, 3, 1)
model = discriminator_block(model, 128, 3, 2)
model = discriminator_block(model, 256, 3, 1)
model = discriminator_block(model, 256, 3, 2)
model = discriminator_block(model, 512, 3, 1)
model = discriminator_block(model, 512, 3, 2)
model = Flatten()(model)
model = Dense(1024)(model)
model = LeakyReLU(alpha = 0.2)(model)
model = Dense(1)(model)
model = Activation('sigmoid')(model)
discriminator_model = Model(inputs = dis_input, outputs = model)
return discriminator_model
def get_gan_network(discriminator, shape_list_AB, generator, optimizer, loss):
discriminator.trainable = False
gan_input_A = Input(shape=shape_list_AB[0])
gan_input_B = Input(shape=shape_list_AB[1])
x = generator([gan_input_A,gan_input_B])
gan_output = discriminator(x)
gan = Model(inputs=[gan_input_A,gan_input_B], outputs=[x,gan_output])
gan.compile(loss=[loss, "binary_crossentropy"], loss_weights=[1., 1e-3], optimizer=optimizer)
return gan
def get_optimizer():
adam = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
return adam
# =============================================================================
# choose some parameters and compile the model
# =============================================================================
batch_size = 128
shape_input_A = (40,40,4)
shape_input_B = (20,20,6)
shape_output = (40,40,6)
generator = Generator(shape_input_B).generator() # todo shape
discriminator = Discriminator(shape_output).discriminator() # todo shape
optimizer = get_optimizer()
generator.compile(loss="mse", optimizer=optimizer)
discriminator.compile(loss="binary_crossentropy", optimizer=optimizer)
gan = get_gan_network(discriminator, [shape_input_A,shape_input_B], generator, optimizer, "mse")
# =============================================================================
# training
# =============================================================================
def get_random_data(mod):
# get the networks input
if mod == 0:
return [np.random.rand(batch_size,40,40,4),np.random.rand(batch_size,20,20,6)]
# get the networks output
else:
return np.random.rand(batch_size,40,40,6)
# initalize empty arrays
rand_nums = np.empty(batch_size,dtype=np.int)
image_batch_lr = np.empty((batch_size,)+shape_input_B)
image_batch_hr = np.empty((batch_size,)+shape_output)
generated_images_sr = np.empty_like(image_batch_hr)
real_data_Y = np.empty(batch_size)
fake_data_Y = np.empty(batch_size)
for e in range(1, 10):
print("epoch:",e)
for batchindex in range(200):
generated_images_sr[:] = generator.predict(get_random_data(0))
real_data_Y[:] = np.ones(batch_size) - np.random.random_sample(batch_size)*0.2
fake_data_Y[:] = np.random.random_sample(batch_size)*0.2
discriminator.trainable = True
d_loss_real = discriminator.train_on_batch(get_random_data(1), real_data_Y)
d_loss_fake = discriminator.train_on_batch(generated_images_sr, fake_data_Y)
discriminator_loss = 0.5 * np.add(d_loss_fake, d_loss_real)
gan_Y = np.ones(batch_size) - np.random.random_sample(batch_size)*0.2
discriminator.trainable = False
gan_loss = gan.train_on_batch(get_random_data(0), [get_random_data(1),gan_Y])
print("discriminator_loss : %f" % discriminator_loss)
print("gan_loss :", gan_loss)
I run this code on my GTX2080 within a docker container tensorflow/tensorflow:2.0.0-gpu-py3.
Training a GAN implies some overhead that will not be executed on the GPU. In your case, obtaining real_data_Y and fake_data_Y, executing get_random_data() and computing the loss will result in GPU idle time.
You can try profiling your program with python -mcProfile -o performance.prof xxx.py and see if there are bottlenecks that can be improved, but 60 to 70% already seems not too bad.
I have a model which takes in a dataframe which looks like this
image,level
10_left,0
10_right,0
13_left,0
with model structure like this
base_image_dir = 'extra_data/dr/'
retina_df = pd.read_csv(os.path.join(base_image_dir, 'trainLabels.csv'))
retina_df['PatientId'] = retina_df['image'].map(lambda x: x.split('_')[0])
retina_df['path'] = retina_df['image'].map(lambda x: os.path.join(base_image_dir,'train',
'{}.jpeg'.format(x)))
retina_df['exists'] = retina_df['path'].map(os.path.exists)
print(retina_df['exists'].sum(), 'images found of', retina_df.shape[0], 'total')
retina_df['eye'] = retina_df['image'].map(lambda x: 1 if x.split('_')[-1]=='left' else 0)
from keras.utils.np_utils import to_categorical
retina_df['level_cat'] = retina_df['level'].map(lambda x: to_categorical(x, 1+retina_df['level'].max()))
retina_df.dropna(inplace = True)
retina_df = retina_df[retina_df['exists']]
retina_df.sample(3)
from sklearn.model_selection import train_test_split
rr_df = retina_df[['PatientId', 'level']].drop_duplicates()
train_ids, valid_ids = train_test_split(rr_df['PatientId'],
test_size = 0.25,
random_state = 2018,
stratify = rr_df['level'])
raw_train_df = retina_df[retina_df['PatientId'].isin(train_ids)]
valid_df = retina_df[retina_df['PatientId'].isin(valid_ids)]
import pdb;pdb.set_trace()
print('train', raw_train_df.shape[0], 'validation', valid_df.shape[0])
train_df = raw_train_df.groupby(['level', 'eye']).apply(lambda x: x.sample(75, replace = True) ).reset_index(drop = True)
print('New Data Size:', train_df.shape[0], 'Old Size:', raw_train_df.shape[0])
import tensorflow as tf
from keras import backend as K
from keras.applications.inception_v3 import preprocess_input
import numpy as np
IMG_SIZE = (512, 512) # slightly smaller than vgg16 normally expects
def tf_image_loader(out_size,
horizontal_flip = True,
vertical_flip = False,
random_brightness = True,
random_contrast = True,
random_saturation = True,
random_hue = True,
color_mode = 'rgb',
preproc_func = preprocess_input,
on_batch = False):
def _func(X):
with tf.name_scope('image_augmentation'):
with tf.name_scope('input'):
X = tf.image.decode_png(tf.read_file(X), channels = 3 if color_mode == 'rgb' else 0)
X = tf.image.resize_images(X, out_size)
with tf.name_scope('augmentation'):
if horizontal_flip:
X = tf.image.random_flip_left_right(X)
if vertical_flip:
X = tf.image.random_flip_up_down(X)
if random_brightness:
X = tf.image.random_brightness(X, max_delta = 0.1)
if random_saturation:
X = tf.image.random_saturation(X, lower = 0.75, upper = 1.5)
if random_hue:
X = tf.image.random_hue(X, max_delta = 0.15)
if random_contrast:
X = tf.image.random_contrast(X, lower = 0.75, upper = 1.5)
return preproc_func(X)
if on_batch:
# we are meant to use it on a batch
def _batch_func(X, y):
return tf.map_fn(_func, X), y
return _batch_func
else:
# we apply it to everything
def _all_func(X, y):
return _func(X), y
return _all_func
def tf_augmentor(out_size,
intermediate_size = (640, 640),
intermediate_trans = 'crop',
batch_size = 16,
horizontal_flip = True,
vertical_flip = False,
random_brightness = True,
random_contrast = True,
random_saturation = True,
random_hue = True,
color_mode = 'rgb',
preproc_func = preprocess_input,
min_crop_percent = 0.001,
max_crop_percent = 0.005,
crop_probability = 0.5,
rotation_range = 10):
load_ops = tf_image_loader(out_size = intermediate_size,
horizontal_flip=horizontal_flip,
vertical_flip=vertical_flip,
random_brightness = random_brightness,
random_contrast = random_contrast,
random_saturation = random_saturation,
random_hue = random_hue,
color_mode = color_mode,
preproc_func = preproc_func,
on_batch=False)
def batch_ops(X, y):
batch_size = tf.shape(X)[0]
with tf.name_scope('transformation'):
# code borrowed from https://becominghuman.ai/data-augmentation-on-gpu-in-tensorflow-13d14ecf2b19
# The list of affine transformations that our image will go under.
# Every element is Nx8 tensor, where N is a batch size.
transforms = []
identity = tf.constant([1, 0, 0, 0, 1, 0, 0, 0], dtype=tf.float32)
if rotation_range > 0:
angle_rad = rotation_range / 180 * np.pi
angles = tf.random_uniform([batch_size], -angle_rad, angle_rad)
transforms += [tf.contrib.image.angles_to_projective_transforms(angles, intermediate_size[0], intermediate_size[1])]
if crop_probability > 0:
crop_pct = tf.random_uniform([batch_size], min_crop_percent, max_crop_percent)
left = tf.random_uniform([batch_size], 0, intermediate_size[0] * (1.0 - crop_pct))
top = tf.random_uniform([batch_size], 0, intermediate_size[1] * (1.0 - crop_pct))
crop_transform = tf.stack([
crop_pct,
tf.zeros([batch_size]), top,
tf.zeros([batch_size]), crop_pct, left,
tf.zeros([batch_size]),
tf.zeros([batch_size])
], 1)
coin = tf.less(tf.random_uniform([batch_size], 0, 1.0), crop_probability)
transforms += [tf.where(coin, crop_transform, tf.tile(tf.expand_dims(identity, 0), [batch_size, 1]))]
if len(transforms)>0:
X = tf.contrib.image.transform(X,
tf.contrib.image.compose_transforms(*transforms),
interpolation='BILINEAR') # or 'NEAREST'
if intermediate_trans=='scale':
X = tf.image.resize_images(X, out_size)
elif intermediate_trans=='crop':
X = tf.image.resize_image_with_crop_or_pad(X, out_size[0], out_size[1])
else:
raise ValueError('Invalid Operation {}'.format(intermediate_trans))
return X, y
def _create_pipeline(in_ds):
batch_ds = in_ds.map(load_ops, num_parallel_calls=4).batch(batch_size)
return batch_ds.map(batch_ops)
return _create_pipeline
def flow_from_dataframe(idg,
in_df,
path_col,
y_col,
shuffle = True,
color_mode = 'rgb'):
files_ds = tf.data.Dataset.from_tensor_slices((in_df[path_col].values,
np.stack(in_df[y_col].values,0)))
in_len = in_df[path_col].values.shape[0]
while True:
if shuffle:
files_ds = files_ds.shuffle(in_len) # shuffle the whole dataset
next_batch = idg(files_ds).repeat().make_one_shot_iterator().get_next()
for i in range(max(in_len//32,1)):
# NOTE: if we loop here it is 'thread-safe-ish' if we loop on the outside it is completely unsafe
yield K.get_session().run(next_batch)
batch_size = 48
core_idg = tf_augmentor(out_size = IMG_SIZE,
color_mode = 'rgb',
vertical_flip = True,
crop_probability=0.0, # crop doesn't work yet
batch_size = batch_size)
valid_idg = tf_augmentor(out_size = IMG_SIZE, color_mode = 'rgb',
crop_probability=0.0,
horizontal_flip = False,
vertical_flip = False,
random_brightness = False,
random_contrast = False,
random_saturation = False,
random_hue = False,
rotation_range = 0,
batch_size = batch_size)
train_gen = flow_from_dataframe(core_idg, train_df,
path_col = 'path',
y_col = 'level_cat')
valid_gen = flow_from_dataframe(valid_idg, valid_df,
path_col = 'path',
y_col = 'level_cat') # we can use much larger batches for evaluation
t_x, t_y = next(valid_gen)
t_x, t_y = next(train_gen)
from keras.applications.vgg16 import VGG16 as PTModel
from keras.applications.inception_resnet_v2 import InceptionResNetV2 as PTModel
from keras.applications.inception_v3 import InceptionV3 as PTModel
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda
from keras.models import Model
in_lay = Input(t_x.shape[1:])
base_pretrained_model = PTModel(input_shape = t_x.shape[1:], include_top = False, weights = 'imagenet')
base_pretrained_model.trainable = False
pt_depth = base_pretrained_model.get_output_shape_at(0)[-1]
pt_features = base_pretrained_model(in_lay)
from keras.layers import BatchNormalization
bn_features = BatchNormalization()(pt_features)
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(1,
kernel_size = (1,1),
padding = 'valid',
activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.25)(gap)
dr_steps = Dropout(0.25)(Dense(128, activation = 'relu')(gap_dr))
out_layer = Dense(t_y.shape[-1], activation = 'softmax')(dr_steps)
retina_model = Model(inputs = [in_lay], outputs = [out_layer])
from keras.metrics import top_k_categorical_accuracy
def top_2_accuracy(in_gt, in_pred):
return top_k_categorical_accuracy(in_gt, in_pred, k=2)
retina_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy',
metrics = ['categorical_accuracy', top_2_accuracy])
retina_model.summary()
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.best.hdf5".format('retina')
checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1,
save_best_only=True, mode='min', save_weights_only = True)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=3, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
early = EarlyStopping(monitor="val_loss",
mode="min",
patience=6) # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early, reduceLROnPlat]
retina_model.fit_generator(train_gen,
steps_per_epoch = train_df.shape[0]//batch_size,
validation_data = valid_gen,
validation_steps = valid_df.shape[0]//batch_size,
epochs = 25,
callbacks = callbacks_list,
workers = 0, # tf-generators are not thread-safe
use_multiprocessing=False,
max_queue_size = 0
)
retina_model.load_weights(weight_path)
retina_model.save('full_retina_model.h5')
I realize that's a lot of code, but i what i want to do is take in a dataframe which look like this
image,N,D,G,C,A,H,M,O
2857_left,1,0,0,0,0,0,0,0
3151_left,1,0,0,0,0,0,0,0
3113_left,1,0,0,0,0,0,0,0
and in order to achive this i have made this following changes,
from sklearn.model_selection import train_test_split
rr_df = retina_df
y = rr_df[['N', 'D', 'G','C','A', 'H', 'M', 'O']]
train_ids, valid_ids = train_test_split(rr_df['PatientId'],
test_size = 0.25,
random_state = 2018)
raw_train_df = retina_df[retina_df['PatientId'].isin(train_ids)]
valid_df = retina_df[retina_df['PatientId'].isin(valid_ids)]
print('train', raw_train_df.shape[0], 'validation', valid_df.shape[0])
train_df = raw_train_df
from keras import regularizers, optimizers
from keras.layers import BatchNormalization
in_lay = Input(t_x.shape[1:])
base_pretrained_model = PTModel(input_shape = t_x.shape[1:], include_top = False, weights = 'imagenet')
base_pretrained_model.trainable = False
pt_depth = base_pretrained_model.get_output_shape_at(0)[-1]
pt_features = base_pretrained_model(in_lay)
bn_features = BatchNormalization()(pt_features)
# here we do an attention mechanism to turn pixels in the GAP on an off
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(1,
kernel_size = (1,1),
padding = 'valid',
activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.25)(gap)
x = Dropout(0.25)(Dense(128, activation = 'relu')(gap_dr))
# out_layer = Dense(t_y.shape[-1], activation = 'softmax')(dr_steps)
output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
retina_model = Model(inputs = [in_lay], outputs = [output1,output2,output3,output4,output5, output6, output7, output8])
# retina_model = Model([in_lay],output1,output2,output3,output4,output5, output6, output7, output8)
# retina_model.build(t_x.shape[1:]) # `input_shape` is the shape of the input data
# print(model.summary())
# retina_model.compile(optimizers.rmsprop(lr = 0.00001, decay = 1e-6),
loss = ["binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy", "binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy"]#,metrics = ["accuracy"])
# retina_model = Model(inputs = [in_lay], outputs = [out_layer])
# from keras.metrics import top_k_categorical_accuracy
# def top_2_accuracy(in_gt, in_pred):
# return top_k_categorical_accuracy(in_gt, in_pred, k=2)
retina_model.compile(optimizer = 'adam', loss = loss,
metrics = ['accuracy'])
retina_model.summary()
but when i run this i get,
ValueError: Error when checking model target: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 8 array(s), but instead got the following list of 1 arrays: [array([[1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
...
Any suggestions on how i could change this model to train this on multi label inputs.Thanks in advance.
You are trying to train a model with 8 different outputs (length 1 for every output) but your target values is an array of length 8.
The easiest fix is to replace:
output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
loss = ["binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy", "binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy"]#,metrics = ["accuracy"])
with:
#leave sigmoid here, don't change with softmax if it is a multilabel problem
output = Dense(8, activation = 'sigmoid')(x)
loss = "binary_crossentropy"
otherwise you have to create a custom generator with yielding a list of 8 targets to feed your network
The input are 3 independent channels of 1000 features. I'm trying to pass each channel through a independent NN path, then concatenate them into a flat layer. Then apply a FCN on the flatten layer for a binary classification.
I'm trying to add multiple Dense layers together, like this:
def tst_1():
inputs = Input((3, 1000, 1))
dense10 = Dense(224, activation='relu')(inputs[0,:,1])
dense11 = Dense(112, activation='relu')(dense10)
dense12 = Dense(56, activation='relu')(dense11)
dense20 = Dense(224, activation='relu')(inputs[1,:,1])
dense21 = Dense(112, activation='relu')(dense20)
dense22 = Dense(56, activation='relu')(dense21)
dense30 = Dense(224, activation='relu')(inputs[2,:,1])
dense31 = Dense(112, activation='relu')(dense30)
dense32 = Dense(56, activation='relu')(dense31)
flat = keras.layers.Add()([dense12, dense22, dense32])
dense1 = Dense(224, activation='relu')(flat)
drop1 = Dropout(0.5)(dense1)
dense2 = Dense(112, activation='relu')(drop1)
drop2 = Dropout(0.5)(dense2)
dense3 = Dense(32, activation='relu')(drop2)
densef = Dense(1, activation='sigmoid')(dense3)
model = Model(inputs = inputs, outputs = densef)
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
return model
model = tst_1()
model.summary()
but I got this error:
/usr/local/lib/python2.7/dist-packages/keras/engine/network.pyc in build_map(tensor, finished_nodes, nodes_in_progress, layer, node_index, tensor_index)
1310 ValueError: if a cycle is detected.
1311 """
-> 1312 node = layer._inbound_nodes[node_index]
1313
1314 # Prevent cycles.
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'
The problem is that splitting the input data using inputs[0,:,1] is not done as a keras layer.
You need to create a Lambda layer to be able to accomplish this.
The following code:
from keras import layers
from keras.layers import Input, Add, Dense,Dropout, Lambda, Concatenate
from keras.layers import Flatten
from keras.optimizers import Adam
from keras.models import Model
import keras.backend as K
def tst_1():
num_channels = 3
inputs = Input(shape=(num_channels, 1000, 1))
branch_outputs = []
for i in range(num_channels):
# Slicing the ith channel:
out = Lambda(lambda x: x[:, i, :, :], name = "Lambda_" + str(i))(inputs)
# Setting up your per-channel layers (replace with actual sub-models):
out = Dense(224, activation='relu', name = "Dense_224_" + str(i))(out)
out = Dense(112, activation='relu', name = "Dense_112_" + str(i))(out)
out = Dense(56, activation='relu', name = "Dense_56_" + str(i))(out)
branch_outputs.append(out)
# Concatenating together the per-channel results:
out = Concatenate()(branch_outputs)
dense1 = Dense(224, activation='relu')(out)
drop1 = Dropout(0.5)(dense1)
dense2 = Dense(112, activation='relu')(drop1)
drop2 = Dropout(0.5)(dense2)
dense3 = Dense(32, activation='relu')(drop2)
densef = Dense(1, activation='sigmoid')(dense3)
model = Model(inputs = inputs, outputs = densef)
return model
Net = tst_1()
Net.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
Net.summary()
correctly created the net that you want.
Thanks to #CAta.RAy
I solved it in this way:
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense,Dropout, Lambda
from keras.layers import Flatten
from keras.optimizers import Adam
from keras.models import Model
import keras.backend as K
def tst_1():
inputs = Input((3, 1000))
x1 = Lambda(lambda x:x[:,0])(inputs)
dense10 = Dense(224, activation='relu')(x1)
dense11 = Dense(112, activation='relu')(dense10)
dense12 = Dense(56, activation='relu')(dense11)
x2 = Lambda(lambda x:x[:,1])(inputs)
dense20 = Dense(224, activation='relu')(x2)
dense21 = Dense(112, activation='relu')(dense20)
dense22 = Dense(56, activation='relu')(dense21)
x3 = Lambda(lambda x:x[:,2])(inputs)
dense30 = Dense(224, activation='relu')(x3)
dense31 = Dense(112, activation='relu')(dense30)
dense32 = Dense(56, activation='relu')(dense31)
flat = Add()([dense12, dense22, dense32])
dense1 = Dense(224, activation='relu')(flat)
drop1 = Dropout(0.5)(dense1)
dense2 = Dense(112, activation='relu')(drop1)
drop2 = Dropout(0.5)(dense2)
dense3 = Dense(32, activation='relu')(drop2)
densef = Dense(1, activation='sigmoid')(dense3)
model = Model(inputs = inputs, outputs = densef)
return model
Net = tst_1()
Net.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
Net.summary()