Converting Keras model to multi label output - python

I have a model which takes in a dataframe which looks like this
image,level
10_left,0
10_right,0
13_left,0
with model structure like this
base_image_dir = 'extra_data/dr/'
retina_df = pd.read_csv(os.path.join(base_image_dir, 'trainLabels.csv'))
retina_df['PatientId'] = retina_df['image'].map(lambda x: x.split('_')[0])
retina_df['path'] = retina_df['image'].map(lambda x: os.path.join(base_image_dir,'train',
'{}.jpeg'.format(x)))
retina_df['exists'] = retina_df['path'].map(os.path.exists)
print(retina_df['exists'].sum(), 'images found of', retina_df.shape[0], 'total')
retina_df['eye'] = retina_df['image'].map(lambda x: 1 if x.split('_')[-1]=='left' else 0)
from keras.utils.np_utils import to_categorical
retina_df['level_cat'] = retina_df['level'].map(lambda x: to_categorical(x, 1+retina_df['level'].max()))
retina_df.dropna(inplace = True)
retina_df = retina_df[retina_df['exists']]
retina_df.sample(3)
from sklearn.model_selection import train_test_split
rr_df = retina_df[['PatientId', 'level']].drop_duplicates()
train_ids, valid_ids = train_test_split(rr_df['PatientId'],
test_size = 0.25,
random_state = 2018,
stratify = rr_df['level'])
raw_train_df = retina_df[retina_df['PatientId'].isin(train_ids)]
valid_df = retina_df[retina_df['PatientId'].isin(valid_ids)]
import pdb;pdb.set_trace()
print('train', raw_train_df.shape[0], 'validation', valid_df.shape[0])
train_df = raw_train_df.groupby(['level', 'eye']).apply(lambda x: x.sample(75, replace = True) ).reset_index(drop = True)
print('New Data Size:', train_df.shape[0], 'Old Size:', raw_train_df.shape[0])
import tensorflow as tf
from keras import backend as K
from keras.applications.inception_v3 import preprocess_input
import numpy as np
IMG_SIZE = (512, 512) # slightly smaller than vgg16 normally expects
def tf_image_loader(out_size,
horizontal_flip = True,
vertical_flip = False,
random_brightness = True,
random_contrast = True,
random_saturation = True,
random_hue = True,
color_mode = 'rgb',
preproc_func = preprocess_input,
on_batch = False):
def _func(X):
with tf.name_scope('image_augmentation'):
with tf.name_scope('input'):
X = tf.image.decode_png(tf.read_file(X), channels = 3 if color_mode == 'rgb' else 0)
X = tf.image.resize_images(X, out_size)
with tf.name_scope('augmentation'):
if horizontal_flip:
X = tf.image.random_flip_left_right(X)
if vertical_flip:
X = tf.image.random_flip_up_down(X)
if random_brightness:
X = tf.image.random_brightness(X, max_delta = 0.1)
if random_saturation:
X = tf.image.random_saturation(X, lower = 0.75, upper = 1.5)
if random_hue:
X = tf.image.random_hue(X, max_delta = 0.15)
if random_contrast:
X = tf.image.random_contrast(X, lower = 0.75, upper = 1.5)
return preproc_func(X)
if on_batch:
# we are meant to use it on a batch
def _batch_func(X, y):
return tf.map_fn(_func, X), y
return _batch_func
else:
# we apply it to everything
def _all_func(X, y):
return _func(X), y
return _all_func
def tf_augmentor(out_size,
intermediate_size = (640, 640),
intermediate_trans = 'crop',
batch_size = 16,
horizontal_flip = True,
vertical_flip = False,
random_brightness = True,
random_contrast = True,
random_saturation = True,
random_hue = True,
color_mode = 'rgb',
preproc_func = preprocess_input,
min_crop_percent = 0.001,
max_crop_percent = 0.005,
crop_probability = 0.5,
rotation_range = 10):
load_ops = tf_image_loader(out_size = intermediate_size,
horizontal_flip=horizontal_flip,
vertical_flip=vertical_flip,
random_brightness = random_brightness,
random_contrast = random_contrast,
random_saturation = random_saturation,
random_hue = random_hue,
color_mode = color_mode,
preproc_func = preproc_func,
on_batch=False)
def batch_ops(X, y):
batch_size = tf.shape(X)[0]
with tf.name_scope('transformation'):
# code borrowed from https://becominghuman.ai/data-augmentation-on-gpu-in-tensorflow-13d14ecf2b19
# The list of affine transformations that our image will go under.
# Every element is Nx8 tensor, where N is a batch size.
transforms = []
identity = tf.constant([1, 0, 0, 0, 1, 0, 0, 0], dtype=tf.float32)
if rotation_range > 0:
angle_rad = rotation_range / 180 * np.pi
angles = tf.random_uniform([batch_size], -angle_rad, angle_rad)
transforms += [tf.contrib.image.angles_to_projective_transforms(angles, intermediate_size[0], intermediate_size[1])]
if crop_probability > 0:
crop_pct = tf.random_uniform([batch_size], min_crop_percent, max_crop_percent)
left = tf.random_uniform([batch_size], 0, intermediate_size[0] * (1.0 - crop_pct))
top = tf.random_uniform([batch_size], 0, intermediate_size[1] * (1.0 - crop_pct))
crop_transform = tf.stack([
crop_pct,
tf.zeros([batch_size]), top,
tf.zeros([batch_size]), crop_pct, left,
tf.zeros([batch_size]),
tf.zeros([batch_size])
], 1)
coin = tf.less(tf.random_uniform([batch_size], 0, 1.0), crop_probability)
transforms += [tf.where(coin, crop_transform, tf.tile(tf.expand_dims(identity, 0), [batch_size, 1]))]
if len(transforms)>0:
X = tf.contrib.image.transform(X,
tf.contrib.image.compose_transforms(*transforms),
interpolation='BILINEAR') # or 'NEAREST'
if intermediate_trans=='scale':
X = tf.image.resize_images(X, out_size)
elif intermediate_trans=='crop':
X = tf.image.resize_image_with_crop_or_pad(X, out_size[0], out_size[1])
else:
raise ValueError('Invalid Operation {}'.format(intermediate_trans))
return X, y
def _create_pipeline(in_ds):
batch_ds = in_ds.map(load_ops, num_parallel_calls=4).batch(batch_size)
return batch_ds.map(batch_ops)
return _create_pipeline
def flow_from_dataframe(idg,
in_df,
path_col,
y_col,
shuffle = True,
color_mode = 'rgb'):
files_ds = tf.data.Dataset.from_tensor_slices((in_df[path_col].values,
np.stack(in_df[y_col].values,0)))
in_len = in_df[path_col].values.shape[0]
while True:
if shuffle:
files_ds = files_ds.shuffle(in_len) # shuffle the whole dataset
next_batch = idg(files_ds).repeat().make_one_shot_iterator().get_next()
for i in range(max(in_len//32,1)):
# NOTE: if we loop here it is 'thread-safe-ish' if we loop on the outside it is completely unsafe
yield K.get_session().run(next_batch)
batch_size = 48
core_idg = tf_augmentor(out_size = IMG_SIZE,
color_mode = 'rgb',
vertical_flip = True,
crop_probability=0.0, # crop doesn't work yet
batch_size = batch_size)
valid_idg = tf_augmentor(out_size = IMG_SIZE, color_mode = 'rgb',
crop_probability=0.0,
horizontal_flip = False,
vertical_flip = False,
random_brightness = False,
random_contrast = False,
random_saturation = False,
random_hue = False,
rotation_range = 0,
batch_size = batch_size)
train_gen = flow_from_dataframe(core_idg, train_df,
path_col = 'path',
y_col = 'level_cat')
valid_gen = flow_from_dataframe(valid_idg, valid_df,
path_col = 'path',
y_col = 'level_cat') # we can use much larger batches for evaluation
t_x, t_y = next(valid_gen)
t_x, t_y = next(train_gen)
from keras.applications.vgg16 import VGG16 as PTModel
from keras.applications.inception_resnet_v2 import InceptionResNetV2 as PTModel
from keras.applications.inception_v3 import InceptionV3 as PTModel
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda
from keras.models import Model
in_lay = Input(t_x.shape[1:])
base_pretrained_model = PTModel(input_shape = t_x.shape[1:], include_top = False, weights = 'imagenet')
base_pretrained_model.trainable = False
pt_depth = base_pretrained_model.get_output_shape_at(0)[-1]
pt_features = base_pretrained_model(in_lay)
from keras.layers import BatchNormalization
bn_features = BatchNormalization()(pt_features)
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(1,
kernel_size = (1,1),
padding = 'valid',
activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.25)(gap)
dr_steps = Dropout(0.25)(Dense(128, activation = 'relu')(gap_dr))
out_layer = Dense(t_y.shape[-1], activation = 'softmax')(dr_steps)
retina_model = Model(inputs = [in_lay], outputs = [out_layer])
from keras.metrics import top_k_categorical_accuracy
def top_2_accuracy(in_gt, in_pred):
return top_k_categorical_accuracy(in_gt, in_pred, k=2)
retina_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy',
metrics = ['categorical_accuracy', top_2_accuracy])
retina_model.summary()
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.best.hdf5".format('retina')
checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1,
save_best_only=True, mode='min', save_weights_only = True)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=3, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
early = EarlyStopping(monitor="val_loss",
mode="min",
patience=6) # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early, reduceLROnPlat]
retina_model.fit_generator(train_gen,
steps_per_epoch = train_df.shape[0]//batch_size,
validation_data = valid_gen,
validation_steps = valid_df.shape[0]//batch_size,
epochs = 25,
callbacks = callbacks_list,
workers = 0, # tf-generators are not thread-safe
use_multiprocessing=False,
max_queue_size = 0
)
retina_model.load_weights(weight_path)
retina_model.save('full_retina_model.h5')
I realize that's a lot of code, but i what i want to do is take in a dataframe which look like this
image,N,D,G,C,A,H,M,O
2857_left,1,0,0,0,0,0,0,0
3151_left,1,0,0,0,0,0,0,0
3113_left,1,0,0,0,0,0,0,0
and in order to achive this i have made this following changes,
from sklearn.model_selection import train_test_split
rr_df = retina_df
y = rr_df[['N', 'D', 'G','C','A', 'H', 'M', 'O']]
train_ids, valid_ids = train_test_split(rr_df['PatientId'],
test_size = 0.25,
random_state = 2018)
raw_train_df = retina_df[retina_df['PatientId'].isin(train_ids)]
valid_df = retina_df[retina_df['PatientId'].isin(valid_ids)]
print('train', raw_train_df.shape[0], 'validation', valid_df.shape[0])
train_df = raw_train_df
from keras import regularizers, optimizers
from keras.layers import BatchNormalization
in_lay = Input(t_x.shape[1:])
base_pretrained_model = PTModel(input_shape = t_x.shape[1:], include_top = False, weights = 'imagenet')
base_pretrained_model.trainable = False
pt_depth = base_pretrained_model.get_output_shape_at(0)[-1]
pt_features = base_pretrained_model(in_lay)
bn_features = BatchNormalization()(pt_features)
# here we do an attention mechanism to turn pixels in the GAP on an off
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(1,
kernel_size = (1,1),
padding = 'valid',
activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.25)(gap)
x = Dropout(0.25)(Dense(128, activation = 'relu')(gap_dr))
# out_layer = Dense(t_y.shape[-1], activation = 'softmax')(dr_steps)
output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
retina_model = Model(inputs = [in_lay], outputs = [output1,output2,output3,output4,output5, output6, output7, output8])
# retina_model = Model([in_lay],output1,output2,output3,output4,output5, output6, output7, output8)
# retina_model.build(t_x.shape[1:]) # `input_shape` is the shape of the input data
# print(model.summary())
# retina_model.compile(optimizers.rmsprop(lr = 0.00001, decay = 1e-6),
loss = ["binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy", "binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy"]#,metrics = ["accuracy"])
# retina_model = Model(inputs = [in_lay], outputs = [out_layer])
# from keras.metrics import top_k_categorical_accuracy
# def top_2_accuracy(in_gt, in_pred):
# return top_k_categorical_accuracy(in_gt, in_pred, k=2)
retina_model.compile(optimizer = 'adam', loss = loss,
metrics = ['accuracy'])
retina_model.summary()
but when i run this i get,
ValueError: Error when checking model target: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 8 array(s), but instead got the following list of 1 arrays: [array([[1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
...
Any suggestions on how i could change this model to train this on multi label inputs.Thanks in advance.

You are trying to train a model with 8 different outputs (length 1 for every output) but your target values is an array of length 8.
The easiest fix is to replace:
output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
loss = ["binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy", "binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy"]#,metrics = ["accuracy"])
with:
#leave sigmoid here, don't change with softmax if it is a multilabel problem
output = Dense(8, activation = 'sigmoid')(x)
loss = "binary_crossentropy"
otherwise you have to create a custom generator with yielding a list of 8 targets to feed your network

Related

ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 32, 10000, 1), found shape=(None, 32, 1000, 1)

What is ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 32, 10000, 1), found shape=(None, 32, 1000, 1)?
When I tried to learn model by tensoflow, I get this error. I think that the shape of the data is the same as that used to input the model. Why does they think this error occure?
def EEGNet(nb_classes, Chans = 64, Samples = 128,
dropoutRate = 0.5, kernLength = 64, F1 = 8,
D = 2, F2 = 16, norm_rate = 0.25, dropoutType = 'Dropout'):
if dropoutType == 'SpatialDropout2D':
dropoutType = Dropout # SpatialDropout2D
elif dropoutType == 'Dropout':
dropoutType = Dropout
else:
raise ValueError('dropoutType must be one of SpatialDropout2D '
'or Dropout, passed as a string.')
input1 = Input(shape = (Chans, Samples, 1))
##################################################################
block1 = Conv2D(F1, (1, kernLength), padding = 'same',
input_shape = (Chans, Samples, 1),
use_bias = False)(input1)
block1 = BatchNormalization()(block1)
block1 = DepthwiseConv2D((Chans, 1), use_bias = False,
depth_multiplier = D,
depthwise_constraint = max_norm(1.))(block1)
block1 = BatchNormalization()(block1)
block1 = Activation('elu')(block1)
block1 = AveragePooling2D((1, 4))(block1)
block1 = dropoutType(dropoutRate)(block1)
block2 = SeparableConv2D(F2, (1, 16),
use_bias = False, padding = 'same')(block1)
block2 = BatchNormalization()(block2)
block2 = Activation('elu')(block2)
block2 = AveragePooling2D((1, 8))(block2)
block2 = dropoutType(dropoutRate)(block2)
flatten = Flatten(name = 'flatten')(block2)
dense = Dense(nb_classes, name = 'dense',
kernel_constraint = max_norm(norm_rate))(flatten)
softmax = Activation('softmax', name = 'softmax')(dense)
return Model(inputs=input1, outputs=softmax)
l = np.random.rand(9573,32,1000,1)
model = EEGNet(2, Chans = 32, Samples = 10000)
early_stopping = EarlyStopping(
monitor='val_loss', min_delta=0.0001,
mode='min', patience=10, verbose=1,
restore_best_weights=True)
# Fit model
optimizer = keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9,
beta_2=0.999, amsgrad=False)
model.compile(loss='binary_crossentropy', optimizer=optimizer,
metrics=['accuracy'])

Keras text classification from scratch - error implementing

I'm implementing Keras text classification from scratch on an army dataset of reviews but I'm getting an error of logits and labels must have the same shape. (None,1) vs ().
The code below is a bit long but it has the tensor preprocesing and the model is a functional API:
max_features = 200
sequence_length = None
embedding_dim = 128
from tensorflow.keras.layers import TextVectorization
dtrain_lab = data_train[['airline_sentiment','negativereason']].to_numpy()
display(dtrain_lab)
tlist_txt = data_train['negativereason'].tolist()
tlist_sent = data_train['airline_sentiment'].tolist()
rac = 0
for k in tlist_txt:
rap = tlist_txt[rac]
if pd.isnull(rap) == True:
tlist_txt[rac] = 'empty'
rac+=1
#p-prueba
p_list = []
for i in tlist_sent:
if i == 'positive':
p_list.append(1)
if i == 'negative' or i == 'neutral':
p_list.append(0)
train_sent = np.array(p_list)
val_txt = data_val['negativereason'].tolist()
val_sent = data_val['airline_sentiment'].tolist()
l_val = []
for j in val_sent:
if j == 'positive':
l_val.append(1)
if j == 'negative' or j == 'neutral':
l_val.append(0)
sent_val_na = np.array(l_val)
dac = 0
for k in val_txt:
hap = val_txt[dac]
if pd.isnull(hap) == True:
val_txt[dac] = 'empty'
dac+=1
tftrain_db = tf.data.Dataset.from_tensor_slices((tlist_txt, train_sent))
tfval_db = tf.data.Dataset.from_tensor_slices((val_txt, sent_val_na))
vectorize_layer = TextVectorization(
standardize='lower_and_strip_punctuation',
split="whitespace",
max_tokens=200,
output_mode="int",
output_sequence_length=30,
)
def vectorize_text(text, label):
text = tf.expand_dims(text, -1)
return vectorize_layer(text), label
text_ds = tftrain_db.map(lambda x, y: x)
vectorize_layer.adapt(text_ds)
v_dbtrain = tftrain_db.map(vectorize_text)
v_dbval = tfval_db.map(vectorize_text)
from tensorflow.keras import layers
inputs = tf.keras.Input(shape=(None,), dtype="int64")
x = layers.Embedding(max_features, embedding_dim)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation="relu" input_shape = (None,))(x)
x = layers.Dropout(0.5)(x)
predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x)
model = tf.keras.Model(inputs, predictions)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
epochs = 3
model.fit(v_dbtrain, validation_data= tfval_db, epochs=epochs)
Error is:
ValueError: `logits` and `labels` must have the same shape, received ((None, 1) vs ()).

Tried to export a function which references 'untracked' resource Tensor("272554:0", shape=(), dtype=resource)

I'm currently using CoAtNet0 for this project, and I can't seem to save the model. Hope someone can guide me how to fix the error or is there another way to save the model? The error for the code is:
AssertionError: Tried to export a function which references
'untracked' resource Tensor("272554:0", shape=(), dtype=resource).
TensorFlow objects (e.g. tf.Variable) captured by functions must be
'tracked' by assigning them to an attribute of a tracked object or
assigned to an attribute of the main object directly.
Here's the code for the model.
# CoAtNet
class MBConv(tf.keras.layers.Layer):
def __init__(self, filters, kernel_size, strides = 1, expand_ratio = 1, se_ratio = 4, residual = True, momentum = 0.9, epsilon = 0.01, convolution = tf.keras.layers.Conv2D, activation = tf.nn.swish, kernel_initializer = "he_normal", **kwargs):
super(MBConv, self).__init__(**kwargs)
self.filters = filters
self.kernel_size = kernel_size
self.strides = strides
self.expand_ratio = expand_ratio
self.se_ratio = se_ratio
self.residual = residual
self.momentum = momentum
self.epsilon = epsilon
self.convolution = convolution
self.activation = activation
self.kernel_initializer = kernel_initializer
self.model_layer = layers.LayerNormalization()
def build(self, input_shape):
self.layers = []
self.post = []
if self.expand_ratio != 1:
conv = self.convolution(input_shape[-1] * self.expand_ratio, 1, use_bias = False, kernel_initializer = self.kernel_initializer)
norm = tf.keras.layers.BatchNormalization(momentum = self.momentum, epsilon = self.epsilon)
act = tf.keras.layers.Activation(self.activation)
input_shape = input_shape[:-1] + (input_shape[-1] * self.expand_ratio,)
self.layers += [conv, norm, act]
#Depthwise Convolution
conv = self.convolution(input_shape[-1], self.kernel_size, strides = self.strides, groups = input_shape[-1], padding = "same", use_bias = False, kernel_initializer = self.kernel_initializer)
norm = tf.keras.layers.BatchNormalization(momentum = self.momentum, epsilon = self.epsilon)
act = tf.keras.layers.Activation(self.activation)
self.layers += [conv, norm, act]
#Squeeze and Excitation layer, if desired
axis = list(range(1, len(input_shape) - 1))
gap = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis = axis, keepdims = True))
squeeze = self.convolution(max(1, int(input_shape[-1] / self.se_ratio)), 1, use_bias = True, kernel_initializer = self.kernel_initializer)
act = tf.keras.layers.Activation(self.activation)
excitation = self.convolution(input_shape[-1], 1, use_bias = True, kernel_initializer = self.kernel_initializer)
se = lambda x: x * tf.nn.sigmoid(excitation(act(squeeze(gap(x)))))
self.layers += [se]
#Output Phase
conv = self.convolution(self.filters, 1, use_bias = False, kernel_initializer = self.kernel_initializer)
norm = tf.keras.layers.BatchNormalization(momentum = self.momentum, epsilon = self.epsilon)
self.layers += [conv, norm]
#Residual
if self.residual:
if 1 < self.strides:
pool = tf.keras.layers.MaxPool2D(pool_size = self.strides + 1, strides = self.strides, padding = "same")
self.post.append(pool)
if input_shape[-1] != self.filters:
resample = self.convolution(self.filters, 1, use_bias = False, kernel_initializer = self.kernel_initializer)
self.post.append(resample)
def call(self, x):
out = x
for layer in self.layers:
out = layer(out)
if self.residual:
for layer in self.post:
x = layer(x)
out = out + x
return out
def get_config(self):
config = super(MBConv, self).get_config()
config["filters"] = self.filters
config["kernel_size"] = self.kernel_size
config["expand_ratio"] = self.expand_ratio
config["se_ratio"] = self.se_ratio
config["residual"] = self.residual
config["momentum"] = self.momentum
config["epsilon"] = self.epsilon
config["convolution"] = self.convolution
config["activation"] = self.activation
config["kernel_initializer"] = self.kernel_initializer
return config
class MultiHeadSelfAttention(tf.keras.layers.Layer):
def __init__(self, emb_dim = 768, n_head = 12, out_dim = None, relative_window_size = None, dropout_rate = 0., kernel_initializer = tf.keras.initializers.RandomNormal(mean = 0, stddev = 0.01), **kwargs):
#ScaledDotProductAttention
super(MultiHeadSelfAttention, self).__init__(**kwargs)
self.emb_dim = emb_dim
self.n_head = n_head
if emb_dim % n_head != 0:
raise ValueError("Shoud be embedding dimension % number of heads = 0.")
if out_dim is None:
out_dim = self.emb_dim
self.out_dim = out_dim
if relative_window_size is not None and np.ndim(relative_window_size) == 0:
relative_window_size = [relative_window_size, relative_window_size]
self.relative_window_size = relative_window_size
self.projection_dim = emb_dim // n_head
self.dropout_rate = dropout_rate
self.query = tf.keras.layers.Dense(emb_dim, kernel_initializer = kernel_initializer)
self.key = tf.keras.layers.Dense(emb_dim, kernel_initializer = kernel_initializer)
self.value = tf.keras.layers.Dense(emb_dim, kernel_initializer = kernel_initializer)
self.combine = tf.keras.layers.Dense(out_dim, kernel_initializer = kernel_initializer)
def build(self, input_shape):
if self.relative_window_size is not None:
self.relative_position_bias_table = self.add_weight("relative_position_bias_table", shape = [((2 * self.relative_window_size[0]) - 1) * ((2 * self.relative_window_size[1]) - 1), self.n_head], trainable = self.trainable)
coords_h = np.arange(self.relative_window_size[0])
coords_w = np.arange(self.relative_window_size[1])
coords = np.stack(np.meshgrid(coords_h, coords_w, indexing = "ij")) #2, Wh, Ww
coords = np.reshape(coords, [2, -1])
relative_coords = np.expand_dims(coords, axis = -1) - np.expand_dims(coords, axis = -2) #2, Wh * Ww, Wh * Ww
relative_coords = np.transpose(relative_coords, [1, 2, 0]) #Wh * Ww, Wh * Ww, 2
relative_coords[:, :, 0] += self.relative_window_size[0] - 1 #shift to start from 0
relative_coords[:, :, 1] += self.relative_window_size[1] - 1
relative_coords[:, :, 0] *= 2 * self.relative_window_size[1] - 1
relative_position_index = np.sum(relative_coords, -1)
self.relative_position_index = tf.Variable(tf.convert_to_tensor(relative_position_index), trainable = False, name= "relative_position_index")
def attention(self, query, key, value, relative_position_bias = None):
score = tf.matmul(query, key, transpose_b = True)
n_key = tf.cast(tf.shape(key)[-1], tf.float32)
scaled_score = score / tf.math.sqrt(n_key)
if relative_position_bias is not None:
scaled_score = scaled_score + relative_position_bias
weight = tf.nn.softmax(scaled_score, axis = -1)
if 0 < self.dropout_rate:
weight = tf.nn.dropout(weight, self.dropout_rate)
out = tf.matmul(weight, value)
return out
def separate_head(self, x):
out = tf.keras.layers.Reshape([-1, self.n_head, self.projection_dim])(x)
out = tf.keras.layers.Permute([2, 1, 3])(out)
return out
def call(self, inputs):
query = self.query(inputs)
key = self.key(inputs)
value = self.value(inputs)
query = self.separate_head(query)
key = self.separate_head(key)
value = self.separate_head(value)
relative_position_bias = None
if self.relative_window_size is not None:
relative_position_bias = tf.gather(self.relative_position_bias_table, tf.reshape(self.relative_position_index, [-1]))
relative_position_bias = tf.reshape(relative_position_bias, [self.relative_window_size[0] * self.relative_window_size[1], self.relative_window_size[0] * self.relative_window_size[1], -1]) #Wh * Ww,Wh * Ww, nH
relative_position_bias = tf.transpose(relative_position_bias, [2, 0, 1]) #nH, Wh * Ww, Wh * Ww
relative_position_bias = tf.expand_dims(relative_position_bias, axis = 0)
attention = self.attention(query, key, value, relative_position_bias)
attention = tf.keras.layers.Permute([2, 1, 3])(attention)
attention = tf.keras.layers.Reshape([-1, self.emb_dim])(attention)
out = self.combine(attention)
return out
def get_config(self):
config = super(MultiHeadSelfAttention, self).get_config()
config["emb_dim"] = self.emb_dim
config["n_head"] = self.n_head
config["out_dim"] = self.out_dim
config["relative_window_size"] = self.relative_window_size
config["projection_dim"] = self.projection_dim
config["dropout_rate"] = self.dropout_rate
return config
class ConvTransformer(tf.keras.layers.Layer):
def __init__(self, emb_dim = 768, n_head = 12, strides = 1, out_dim = None, epsilon = 1e-5, dropout_rate = 0., activation = tf.keras.activations.gelu, kernel_initializer = tf.keras.initializers.RandomNormal(mean = 0, stddev = 0.01), **kwargs):
super(ConvTransformer, self).__init__(**kwargs)
self.emb_dim = emb_dim
self.n_head = n_head
self.strides = strides
self.out_dim = out_dim if out_dim is not None else emb_dim
self.epsilon = epsilon
self.dropout_rate = dropout_rate
self.activation = activation
self.kernel_initializer = kernel_initializer
def build(self, input_shape):
self.attention = []
self.residual = []
#Attention
shape = input_shape[1:3]
if 1 < self.strides:
shape = np.divide(np.add(shape, (self.strides - 1)), self.strides).astype(int)
pool = tf.keras.layers.MaxPool2D(pool_size = self.strides + 1, strides = self.strides, padding = "same")
self.attention.append(pool)
self.residual.append(pool)
if input_shape[-1] != self.out_dim:
resample = tf.keras.layers.Conv2D(self.out_dim, 1, padding = "same", use_bias = False, kernel_initializer = "he_normal")
self.residual.append(resample)
pre_reshape = tf.keras.layers.Reshape([-1, input_shape[-1]])
mhsa = MultiHeadSelfAttention(emb_dim = self.emb_dim, n_head = self.n_head, out_dim = self.out_dim, relative_window_size = shape, dropout_rate = self.dropout_rate)
post_reshape = tf.keras.layers.Reshape([*shape, self.out_dim])
self.attention += [pre_reshape, mhsa, post_reshape]
self.ffn = []
#Feed Forward Network
norm = tf.keras.layers.LayerNormalization(epsilon = self.epsilon)
dense1 = tf.keras.layers.Dense(self.out_dim, kernel_initializer = self.kernel_initializer)
act = tf.keras.layers.Activation(self.activation)
dense2 = tf.keras.layers.Dense(self.out_dim, kernel_initializer = self.kernel_initializer)
self.ffn = [norm, dense1, act, dense2]
def call(self, inputs):
out = inputs
for layer in self.attention:
out = layer(out)
for layer in self.residual:
inputs = layer(inputs)
out = out + inputs
for layer in self.ffn:
out = layer(out)
return out
def get_config(self):
config = super(ConvTransformer, self).get_config()
config["emb_dim"] = self.emb_dim
config["n_head"] = self.n_head
config["strides"] = self.strides
config["out_dim"] = self.out_dim
config["epsilon"] = self.epsilon
config["dropout_rate"] = self.dropout_rate
config["activation"] = self.activation
config["kernel_initializer"] = self.kernel_initializer
return config
def coatnet(x, n_class = 1000, include_top = True, n_depth = [2, 2, 6, 14, 2], n_feature = [64, 96, 192, 384, 768], block = ["C", "M", "M", "T", "T"], stage_stride_size = 2, expand_ratio = 4, se_ratio = 4, dropout_rate = 0., activation = tf.keras.activations.gelu, name = ""):
#block : S > Stem, C > MBConv, T > Transformer
if 0 < len(name):
name += "_"
if isinstance(stage_stride_size, int):
stage_stride_size = [stage_stride_size] * len(block)
out = x
for i, (_n_depth, _n_feature, _block, _stage_stride_size) in enumerate(zip(n_depth, n_feature, block, stage_stride_size)):
for j in range(_n_depth):
stride_size = 1 if j != 0 else _stage_stride_size
residual = out
if _block.upper() == "C":# i == 0:
out = tf.keras.layers.Conv2D(_n_feature, 1 if i != 0 else 3, strides = stride_size, padding = "same", use_bias = False, kernel_initializer = "he_normal", name = "{0}stage{1}_conv{2}".format(name, i, j + 1))(out)
out = tf.keras.layers.BatchNormalization(momentum = 0.9, epsilon = 1e-5, name = "{0}stage{1}_norm{2}".format(name, i, j + 1))(out)
out = tf.keras.layers.Activation(activation, name = "{0}stage{1}_act{2}".format(name, i, j + 1))(out)
elif _block.upper() == "M":
out = tf.keras.layers.BatchNormalization(momentum = 0.9, epsilon = 1e-5, name = "{0}stage{1}_pre_norm{2}".format(name, i, j + 1))(out)
out = MBConv(_n_feature, 3, strides = stride_size, expand_ratio = expand_ratio, se_ratio = se_ratio, residual = True, momentum = 0.9, epsilon = 1e-5, activation = activation, name = "{0}stage{1}_mbconv{2}".format(name, i, j + 1))(out)
elif _block.upper() == "T":
out = tf.keras.layers.LayerNormalization(epsilon = 1e-5, name = "{0}stage{1}_pre_norm{2}".format(name, i, j + 1))(out)
out = ConvTransformer(32 * 8, 8, strides = stride_size, out_dim = _n_feature, epsilon = 1e-5, activation = activation, name = "{0}stage{1}_transformer{2}".format(name, i, j + 1))(out)
if include_top:
out = tf.keras.layers.GlobalAveragePooling2D(name = "{0}gap".format(name))(out)
if 0 < dropout_rate:
out = tf.keras.layers.Dropout(dropout_rate, name = "{0}dropout".format(name))(out)
out = tf.keras.layers.Dense(n_class, kernel_initializer = tf.keras.initializers.RandomNormal(mean = 0, stddev = 0.01), name = "{0}logits".format(name))(out)
return out
def coatnet0(input_tensor = None, input_shape = None, classes = 1000, include_top = True, weights = None):
if input_tensor is None:
img_input = tf.keras.layers.Input(shape = input_shape)
else:
if not tf.keras.backend.is_keras_tensor(input_tensor):
img_input = tf.keras.layers.Input(tensor = input_tensor, shape = input_shape)
else:
img_input = input_tensor
out = coatnet(img_input, classes, include_top, n_depth = [2, 2, 3, 5, 2], n_feature = [64, 96, 192, 384, 768], block = ["C", "M", "M", "T", "T"], stage_stride_size = 2, expand_ratio = 4, se_ratio = 4, dropout_rate = 0., activation = tf.keras.activations.gelu)
model = tf.keras.Model(img_input, out)
if weights is not None:
model.load_weights(weights)
return model
def get_model():
model = coatnet0(input_shape = (224, 224, 3), include_top = False)
for layer in model.layers[:-1]:
layer.trainable = False
#adding layers
x = tf.keras.layers.Flatten()(model.output)
#x = tf.keras.layers.BatchNormalization()(x)
#x = tf.keras.layers.Dense(500, activation = tf.keras.activations.gelu)(x)
x = tf.keras.layers.Dense(500, activation = tf.keras.activations.gelu, kernel_initializer=tf.keras.initializers.VarianceScaling()`)(x)`
#x = tf.keras.layers.Dropout(0.2)(x)
#x = tf.keras.layers.Dense(500, activation = tf.keras.activations.gelu)(x)
x = tf.keras.layers.Dense(500, activation = tf.keras.activations.gelu, kernel_initializer=tf.keras.initializers.VarianceScaling()
)(x)
prediction = tf.keras.layers.Dense(2, activation = 'softmax', kernel_initializer=tf.keras.initializers.VarianceScaling()
)(x)
model = tf.keras.Model(model.input, prediction)
model.summary()
loss = tf.keras.losses.binary_crossentropy
opt = tf.keras.optimizers.Adam(learning_rate=0.00001)
metric = ['accuracy']
#weights = compute_class_weight(class_weight = "balanced", classes = np.unique(train_batches.classes), y = train_batches.classes)
#cw = dict(zip(np.unique(train_batches.classes), weights))
callbacks = [
#tf.keras.callbacks.ModelCheckpoint("covid_classifier_model.h1", save_best_only=True, verbose = 0),
tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_loss', mode = "auto", verbose=1),
tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='auto')
]
model.compile(optimizer = opt, loss = loss,
metrics=metric)
return model
model.save("my_model")

Softmax output returning only ones and zeros?

my convolutional neural network is returning only ones and zeros on softmax output (out1), anyone knows why?
def build(self):
inp = Input(self.obs_shape)
conv0 = Conv2D(32, 2, 1, padding="same", activation = "relu")(inp)
drop0 = MaxPool2D((2,2))(conv0)
conv1 = Conv2D(64, 3, 2, padding="same", activation = "relu")(drop0)
drop1 = MaxPool2D((2,2))(conv1)
flat = Flatten()(drop1)
hid0 = Dense(128, activation='relu')(flat)
hid1 = Dense(256, activation='relu')(hid0)
hid = Dense(128, activation='relu')(hid1)
out1 = Dense(self.action_count, activation='softmax')(hid)
out2 = Dense(1, activation='linear')(hid)
model = Model(inputs = [inp], outputs = [out1, out2])
model.compile(optimizer = tf.keras.optimizers.Adam(lr = self.lr),
loss = [self.actor_loss, "mse"])
return model
def actor_loss(self, y_actual, y_pred):
actions = tf.cast(y_actual[:, 0], tf.int32)
returns = y_actual[:, 1]
mask = tf.one_hot(actions, self.action_count)
logps = tf.math.log(tf.boolean_mask(y_pred, mask) + 1e-3)
entropy = -tf.math.reduce_sum(y_pred * tf.math.log(y_pred))
return -tf.math.reduce_sum(logps * returns) - 0.0001*entropy
model = Model(inputs = [inp], outputs = [out1, out2])
look at aboveļ¼Œ there only two output.
so, you function build was lock the number of output,
so only get 1 or 0 ;
in one word :you need change your models
sorry ,my english is bad .

Multidimensional Regression Network in Keras quickly trends to 0

I had a basic keras network predicting one value and it worked fine. I tried adding support for predicting 4 values, but when I do that, the output instantly trends to 0. Right now, the network is the simplistic model just for testing.
The input for the network is an array of shape (90,) and the output should have 4 values. I tried having an output layer with 4 nodes as well, but that showed the same behavior of this one. I've also tried some various loss functions.
def runNN(training_data,training_labels,test_data,test_labels, model = None):
if model == None:
inp = (Input(shape = (90,), name = 'input'))
model = (Dense(units = 90, activation='relu'(inp)
model = (Dropout(0.5))(model)
model = (Dense(units = 180, activation='relu'))(model)
model = (Dropout(0.5))(model)
output1 = Dense(1, activation = 'relu', name = 'preretirement')(model)
output2 = Dense(1, activation = 'relu',name = 'cola')(model)
output3 = Dense(1, activation = 'relu',name = 'initialNC')(model)
output4 = Dense(1, activation = 'relu',name = 'finalNC')(model)
complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4])
optimizer = Adam(lr = .0003)
complete_model.compile(loss='mean_absolute_error',
optimizer=optimizer,
metrics=['mean_absolute_error'])
complete_model.fit(training_data, {'preretirement' : training_labels[0],
'cola' : training_labels[1],
'initialNC' : training_labels[2],
'finalNC' : training_labels[3]},
epochs = 10, batch_size = 128)
The output after 1 epoch, and anything afterwards, is [0,0,0,0] for each test point. It should be a 4 item list with values between 0 and 1 such as [.34,.56,.12,.87]
Probably you're doing a prediction task or function fitting task. Two suggestions might help you:
sigmoid usually works better than relu in prediction task.
Do not use activation function at the final output layer.
The code below is modified from yours and it works fine.
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam
dropout_rate = .5
activate_function = 'sigmoid'
num_iteration = 20
inp = Input(shape = (90,), name = 'input')
model = Dense(units = 90, activation=activate_function)(inp)
model = Dropout(rate=dropout_rate)(model)
model = Dense(units = 180, activation=activate_function)(model)
model = Dropout(rate=dropout_rate)(model)
output1 = Dense(units=1, name = 'preretirement')(model)
output2 = Dense(units=1, name = 'cola')(model)
output3 = Dense(units=1, name = 'initialNC')(model)
output4 = Dense(units=1, name = 'finalNC')(model)
# # Your original code
# output1 = Dense(units=1, activation = activate_function, name = 'preretirement')(model)
# output2 = Dense(units=1, activation = activate_function,name = 'cola')(model)
# output3 = Dense(units=1, activation = activate_function,name = 'initialNC')(model)
# output4 = Dense(units=1, activation = activate_function,name = 'finalNC')(model)
complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4])
optimizer = Adam(lr = .0003)
complete_model.compile(loss='mean_absolute_error',
optimizer=optimizer,
metrics=['mean_absolute_error'])
# generate data for training the model
import numpy as np
num_train = 4000 # the number of training instances
# a normal distribution with mean=2, variance=1
training_data = np.random.normal(2, 1, (num_train, 90))
training_labels = np.zeros(shape=(num_train, 4))
for i in range(num_train):
tmp = np.sum(training_data[i, :])/90.0
training_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp]
print(training_data.shape, training_labels.shape)
# generate data for testing the model
test_data = np.random.normal(0, 1, (10, 90)) # 10 test instances
test_labels = np.zeros(shape=(10, 4))
for i in range(10):
tmp = np.sum(training_data[i, :])/90.0
test_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp]
print(test_data.shape, test_labels.shape)
complete_model.fit(training_data, {'preretirement' : training_labels[:, 0],
'cola' : training_labels[:, 1],
'initialNC' : training_labels[:, 2],
'finalNC' : training_labels[:, 3]},
epochs = num_iteration,
batch_size = 128)
results = complete_model.predict(test_data)
for i in range(10):
print('true', test_labels[i])
print('predicted', results[0][i, 0], results[1][i, 0], results[2][i, 0], results[3][i, 0])
print('--------------------------')
The code produces

Categories