Converting Keras model to multi label output - python
I have a model which takes in a dataframe which looks like this
image,level
10_left,0
10_right,0
13_left,0
with model structure like this
base_image_dir = 'extra_data/dr/'
retina_df = pd.read_csv(os.path.join(base_image_dir, 'trainLabels.csv'))
retina_df['PatientId'] = retina_df['image'].map(lambda x: x.split('_')[0])
retina_df['path'] = retina_df['image'].map(lambda x: os.path.join(base_image_dir,'train',
'{}.jpeg'.format(x)))
retina_df['exists'] = retina_df['path'].map(os.path.exists)
print(retina_df['exists'].sum(), 'images found of', retina_df.shape[0], 'total')
retina_df['eye'] = retina_df['image'].map(lambda x: 1 if x.split('_')[-1]=='left' else 0)
from keras.utils.np_utils import to_categorical
retina_df['level_cat'] = retina_df['level'].map(lambda x: to_categorical(x, 1+retina_df['level'].max()))
retina_df.dropna(inplace = True)
retina_df = retina_df[retina_df['exists']]
retina_df.sample(3)
from sklearn.model_selection import train_test_split
rr_df = retina_df[['PatientId', 'level']].drop_duplicates()
train_ids, valid_ids = train_test_split(rr_df['PatientId'],
test_size = 0.25,
random_state = 2018,
stratify = rr_df['level'])
raw_train_df = retina_df[retina_df['PatientId'].isin(train_ids)]
valid_df = retina_df[retina_df['PatientId'].isin(valid_ids)]
import pdb;pdb.set_trace()
print('train', raw_train_df.shape[0], 'validation', valid_df.shape[0])
train_df = raw_train_df.groupby(['level', 'eye']).apply(lambda x: x.sample(75, replace = True) ).reset_index(drop = True)
print('New Data Size:', train_df.shape[0], 'Old Size:', raw_train_df.shape[0])
import tensorflow as tf
from keras import backend as K
from keras.applications.inception_v3 import preprocess_input
import numpy as np
IMG_SIZE = (512, 512) # slightly smaller than vgg16 normally expects
def tf_image_loader(out_size,
horizontal_flip = True,
vertical_flip = False,
random_brightness = True,
random_contrast = True,
random_saturation = True,
random_hue = True,
color_mode = 'rgb',
preproc_func = preprocess_input,
on_batch = False):
def _func(X):
with tf.name_scope('image_augmentation'):
with tf.name_scope('input'):
X = tf.image.decode_png(tf.read_file(X), channels = 3 if color_mode == 'rgb' else 0)
X = tf.image.resize_images(X, out_size)
with tf.name_scope('augmentation'):
if horizontal_flip:
X = tf.image.random_flip_left_right(X)
if vertical_flip:
X = tf.image.random_flip_up_down(X)
if random_brightness:
X = tf.image.random_brightness(X, max_delta = 0.1)
if random_saturation:
X = tf.image.random_saturation(X, lower = 0.75, upper = 1.5)
if random_hue:
X = tf.image.random_hue(X, max_delta = 0.15)
if random_contrast:
X = tf.image.random_contrast(X, lower = 0.75, upper = 1.5)
return preproc_func(X)
if on_batch:
# we are meant to use it on a batch
def _batch_func(X, y):
return tf.map_fn(_func, X), y
return _batch_func
else:
# we apply it to everything
def _all_func(X, y):
return _func(X), y
return _all_func
def tf_augmentor(out_size,
intermediate_size = (640, 640),
intermediate_trans = 'crop',
batch_size = 16,
horizontal_flip = True,
vertical_flip = False,
random_brightness = True,
random_contrast = True,
random_saturation = True,
random_hue = True,
color_mode = 'rgb',
preproc_func = preprocess_input,
min_crop_percent = 0.001,
max_crop_percent = 0.005,
crop_probability = 0.5,
rotation_range = 10):
load_ops = tf_image_loader(out_size = intermediate_size,
horizontal_flip=horizontal_flip,
vertical_flip=vertical_flip,
random_brightness = random_brightness,
random_contrast = random_contrast,
random_saturation = random_saturation,
random_hue = random_hue,
color_mode = color_mode,
preproc_func = preproc_func,
on_batch=False)
def batch_ops(X, y):
batch_size = tf.shape(X)[0]
with tf.name_scope('transformation'):
# code borrowed from https://becominghuman.ai/data-augmentation-on-gpu-in-tensorflow-13d14ecf2b19
# The list of affine transformations that our image will go under.
# Every element is Nx8 tensor, where N is a batch size.
transforms = []
identity = tf.constant([1, 0, 0, 0, 1, 0, 0, 0], dtype=tf.float32)
if rotation_range > 0:
angle_rad = rotation_range / 180 * np.pi
angles = tf.random_uniform([batch_size], -angle_rad, angle_rad)
transforms += [tf.contrib.image.angles_to_projective_transforms(angles, intermediate_size[0], intermediate_size[1])]
if crop_probability > 0:
crop_pct = tf.random_uniform([batch_size], min_crop_percent, max_crop_percent)
left = tf.random_uniform([batch_size], 0, intermediate_size[0] * (1.0 - crop_pct))
top = tf.random_uniform([batch_size], 0, intermediate_size[1] * (1.0 - crop_pct))
crop_transform = tf.stack([
crop_pct,
tf.zeros([batch_size]), top,
tf.zeros([batch_size]), crop_pct, left,
tf.zeros([batch_size]),
tf.zeros([batch_size])
], 1)
coin = tf.less(tf.random_uniform([batch_size], 0, 1.0), crop_probability)
transforms += [tf.where(coin, crop_transform, tf.tile(tf.expand_dims(identity, 0), [batch_size, 1]))]
if len(transforms)>0:
X = tf.contrib.image.transform(X,
tf.contrib.image.compose_transforms(*transforms),
interpolation='BILINEAR') # or 'NEAREST'
if intermediate_trans=='scale':
X = tf.image.resize_images(X, out_size)
elif intermediate_trans=='crop':
X = tf.image.resize_image_with_crop_or_pad(X, out_size[0], out_size[1])
else:
raise ValueError('Invalid Operation {}'.format(intermediate_trans))
return X, y
def _create_pipeline(in_ds):
batch_ds = in_ds.map(load_ops, num_parallel_calls=4).batch(batch_size)
return batch_ds.map(batch_ops)
return _create_pipeline
def flow_from_dataframe(idg,
in_df,
path_col,
y_col,
shuffle = True,
color_mode = 'rgb'):
files_ds = tf.data.Dataset.from_tensor_slices((in_df[path_col].values,
np.stack(in_df[y_col].values,0)))
in_len = in_df[path_col].values.shape[0]
while True:
if shuffle:
files_ds = files_ds.shuffle(in_len) # shuffle the whole dataset
next_batch = idg(files_ds).repeat().make_one_shot_iterator().get_next()
for i in range(max(in_len//32,1)):
# NOTE: if we loop here it is 'thread-safe-ish' if we loop on the outside it is completely unsafe
yield K.get_session().run(next_batch)
batch_size = 48
core_idg = tf_augmentor(out_size = IMG_SIZE,
color_mode = 'rgb',
vertical_flip = True,
crop_probability=0.0, # crop doesn't work yet
batch_size = batch_size)
valid_idg = tf_augmentor(out_size = IMG_SIZE, color_mode = 'rgb',
crop_probability=0.0,
horizontal_flip = False,
vertical_flip = False,
random_brightness = False,
random_contrast = False,
random_saturation = False,
random_hue = False,
rotation_range = 0,
batch_size = batch_size)
train_gen = flow_from_dataframe(core_idg, train_df,
path_col = 'path',
y_col = 'level_cat')
valid_gen = flow_from_dataframe(valid_idg, valid_df,
path_col = 'path',
y_col = 'level_cat') # we can use much larger batches for evaluation
t_x, t_y = next(valid_gen)
t_x, t_y = next(train_gen)
from keras.applications.vgg16 import VGG16 as PTModel
from keras.applications.inception_resnet_v2 import InceptionResNetV2 as PTModel
from keras.applications.inception_v3 import InceptionV3 as PTModel
from keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda
from keras.models import Model
in_lay = Input(t_x.shape[1:])
base_pretrained_model = PTModel(input_shape = t_x.shape[1:], include_top = False, weights = 'imagenet')
base_pretrained_model.trainable = False
pt_depth = base_pretrained_model.get_output_shape_at(0)[-1]
pt_features = base_pretrained_model(in_lay)
from keras.layers import BatchNormalization
bn_features = BatchNormalization()(pt_features)
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(1,
kernel_size = (1,1),
padding = 'valid',
activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.25)(gap)
dr_steps = Dropout(0.25)(Dense(128, activation = 'relu')(gap_dr))
out_layer = Dense(t_y.shape[-1], activation = 'softmax')(dr_steps)
retina_model = Model(inputs = [in_lay], outputs = [out_layer])
from keras.metrics import top_k_categorical_accuracy
def top_2_accuracy(in_gt, in_pred):
return top_k_categorical_accuracy(in_gt, in_pred, k=2)
retina_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy',
metrics = ['categorical_accuracy', top_2_accuracy])
retina_model.summary()
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.best.hdf5".format('retina')
checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1,
save_best_only=True, mode='min', save_weights_only = True)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=3, verbose=1, mode='auto', epsilon=0.0001, cooldown=5, min_lr=0.0001)
early = EarlyStopping(monitor="val_loss",
mode="min",
patience=6) # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early, reduceLROnPlat]
retina_model.fit_generator(train_gen,
steps_per_epoch = train_df.shape[0]//batch_size,
validation_data = valid_gen,
validation_steps = valid_df.shape[0]//batch_size,
epochs = 25,
callbacks = callbacks_list,
workers = 0, # tf-generators are not thread-safe
use_multiprocessing=False,
max_queue_size = 0
)
retina_model.load_weights(weight_path)
retina_model.save('full_retina_model.h5')
I realize that's a lot of code, but i what i want to do is take in a dataframe which look like this
image,N,D,G,C,A,H,M,O
2857_left,1,0,0,0,0,0,0,0
3151_left,1,0,0,0,0,0,0,0
3113_left,1,0,0,0,0,0,0,0
and in order to achive this i have made this following changes,
from sklearn.model_selection import train_test_split
rr_df = retina_df
y = rr_df[['N', 'D', 'G','C','A', 'H', 'M', 'O']]
train_ids, valid_ids = train_test_split(rr_df['PatientId'],
test_size = 0.25,
random_state = 2018)
raw_train_df = retina_df[retina_df['PatientId'].isin(train_ids)]
valid_df = retina_df[retina_df['PatientId'].isin(valid_ids)]
print('train', raw_train_df.shape[0], 'validation', valid_df.shape[0])
train_df = raw_train_df
from keras import regularizers, optimizers
from keras.layers import BatchNormalization
in_lay = Input(t_x.shape[1:])
base_pretrained_model = PTModel(input_shape = t_x.shape[1:], include_top = False, weights = 'imagenet')
base_pretrained_model.trainable = False
pt_depth = base_pretrained_model.get_output_shape_at(0)[-1]
pt_features = base_pretrained_model(in_lay)
bn_features = BatchNormalization()(pt_features)
# here we do an attention mechanism to turn pixels in the GAP on an off
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(Dropout(0.5)(bn_features))
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(8, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = Conv2D(1,
kernel_size = (1,1),
padding = 'valid',
activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.25)(gap)
x = Dropout(0.25)(Dense(128, activation = 'relu')(gap_dr))
# out_layer = Dense(t_y.shape[-1], activation = 'softmax')(dr_steps)
output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
retina_model = Model(inputs = [in_lay], outputs = [output1,output2,output3,output4,output5, output6, output7, output8])
# retina_model = Model([in_lay],output1,output2,output3,output4,output5, output6, output7, output8)
# retina_model.build(t_x.shape[1:]) # `input_shape` is the shape of the input data
# print(model.summary())
# retina_model.compile(optimizers.rmsprop(lr = 0.00001, decay = 1e-6),
loss = ["binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy", "binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy"]#,metrics = ["accuracy"])
# retina_model = Model(inputs = [in_lay], outputs = [out_layer])
# from keras.metrics import top_k_categorical_accuracy
# def top_2_accuracy(in_gt, in_pred):
# return top_k_categorical_accuracy(in_gt, in_pred, k=2)
retina_model.compile(optimizer = 'adam', loss = loss,
metrics = ['accuracy'])
retina_model.summary()
but when i run this i get,
ValueError: Error when checking model target: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 8 array(s), but instead got the following list of 1 arrays: [array([[1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1],
[0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0],
...
Any suggestions on how i could change this model to train this on multi label inputs.Thanks in advance.
You are trying to train a model with 8 different outputs (length 1 for every output) but your target values is an array of length 8.
The easiest fix is to replace:
output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
loss = ["binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy", "binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy"]#,metrics = ["accuracy"])
with:
#leave sigmoid here, don't change with softmax if it is a multilabel problem
output = Dense(8, activation = 'sigmoid')(x)
loss = "binary_crossentropy"
otherwise you have to create a custom generator with yielding a list of 8 targets to feed your network
Related
ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 32, 10000, 1), found shape=(None, 32, 1000, 1)
What is ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 32, 10000, 1), found shape=(None, 32, 1000, 1)? When I tried to learn model by tensoflow, I get this error. I think that the shape of the data is the same as that used to input the model. Why does they think this error occure? def EEGNet(nb_classes, Chans = 64, Samples = 128, dropoutRate = 0.5, kernLength = 64, F1 = 8, D = 2, F2 = 16, norm_rate = 0.25, dropoutType = 'Dropout'): if dropoutType == 'SpatialDropout2D': dropoutType = Dropout # SpatialDropout2D elif dropoutType == 'Dropout': dropoutType = Dropout else: raise ValueError('dropoutType must be one of SpatialDropout2D ' 'or Dropout, passed as a string.') input1 = Input(shape = (Chans, Samples, 1)) ################################################################## block1 = Conv2D(F1, (1, kernLength), padding = 'same', input_shape = (Chans, Samples, 1), use_bias = False)(input1) block1 = BatchNormalization()(block1) block1 = DepthwiseConv2D((Chans, 1), use_bias = False, depth_multiplier = D, depthwise_constraint = max_norm(1.))(block1) block1 = BatchNormalization()(block1) block1 = Activation('elu')(block1) block1 = AveragePooling2D((1, 4))(block1) block1 = dropoutType(dropoutRate)(block1) block2 = SeparableConv2D(F2, (1, 16), use_bias = False, padding = 'same')(block1) block2 = BatchNormalization()(block2) block2 = Activation('elu')(block2) block2 = AveragePooling2D((1, 8))(block2) block2 = dropoutType(dropoutRate)(block2) flatten = Flatten(name = 'flatten')(block2) dense = Dense(nb_classes, name = 'dense', kernel_constraint = max_norm(norm_rate))(flatten) softmax = Activation('softmax', name = 'softmax')(dense) return Model(inputs=input1, outputs=softmax) l = np.random.rand(9573,32,1000,1) model = EEGNet(2, Chans = 32, Samples = 10000) early_stopping = EarlyStopping( monitor='val_loss', min_delta=0.0001, mode='min', patience=10, verbose=1, restore_best_weights=True) # Fit model optimizer = keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
Keras text classification from scratch - error implementing
I'm implementing Keras text classification from scratch on an army dataset of reviews but I'm getting an error of logits and labels must have the same shape. (None,1) vs (). The code below is a bit long but it has the tensor preprocesing and the model is a functional API: max_features = 200 sequence_length = None embedding_dim = 128 from tensorflow.keras.layers import TextVectorization dtrain_lab = data_train[['airline_sentiment','negativereason']].to_numpy() display(dtrain_lab) tlist_txt = data_train['negativereason'].tolist() tlist_sent = data_train['airline_sentiment'].tolist() rac = 0 for k in tlist_txt: rap = tlist_txt[rac] if pd.isnull(rap) == True: tlist_txt[rac] = 'empty' rac+=1 #p-prueba p_list = [] for i in tlist_sent: if i == 'positive': p_list.append(1) if i == 'negative' or i == 'neutral': p_list.append(0) train_sent = np.array(p_list) val_txt = data_val['negativereason'].tolist() val_sent = data_val['airline_sentiment'].tolist() l_val = [] for j in val_sent: if j == 'positive': l_val.append(1) if j == 'negative' or j == 'neutral': l_val.append(0) sent_val_na = np.array(l_val) dac = 0 for k in val_txt: hap = val_txt[dac] if pd.isnull(hap) == True: val_txt[dac] = 'empty' dac+=1 tftrain_db = tf.data.Dataset.from_tensor_slices((tlist_txt, train_sent)) tfval_db = tf.data.Dataset.from_tensor_slices((val_txt, sent_val_na)) vectorize_layer = TextVectorization( standardize='lower_and_strip_punctuation', split="whitespace", max_tokens=200, output_mode="int", output_sequence_length=30, ) def vectorize_text(text, label): text = tf.expand_dims(text, -1) return vectorize_layer(text), label text_ds = tftrain_db.map(lambda x, y: x) vectorize_layer.adapt(text_ds) v_dbtrain = tftrain_db.map(vectorize_text) v_dbval = tfval_db.map(vectorize_text) from tensorflow.keras import layers inputs = tf.keras.Input(shape=(None,), dtype="int64") x = layers.Embedding(max_features, embedding_dim)(inputs) x = layers.Dropout(0.5)(x) x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) x = layers.GlobalMaxPooling1D()(x) x = layers.Dense(128, activation="relu" input_shape = (None,))(x) x = layers.Dropout(0.5)(x) predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x) model = tf.keras.Model(inputs, predictions) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) epochs = 3 model.fit(v_dbtrain, validation_data= tfval_db, epochs=epochs) Error is: ValueError: `logits` and `labels` must have the same shape, received ((None, 1) vs ()).
Tried to export a function which references 'untracked' resource Tensor("272554:0", shape=(), dtype=resource)
I'm currently using CoAtNet0 for this project, and I can't seem to save the model. Hope someone can guide me how to fix the error or is there another way to save the model? The error for the code is: AssertionError: Tried to export a function which references 'untracked' resource Tensor("272554:0", shape=(), dtype=resource). TensorFlow objects (e.g. tf.Variable) captured by functions must be 'tracked' by assigning them to an attribute of a tracked object or assigned to an attribute of the main object directly. Here's the code for the model. # CoAtNet class MBConv(tf.keras.layers.Layer): def __init__(self, filters, kernel_size, strides = 1, expand_ratio = 1, se_ratio = 4, residual = True, momentum = 0.9, epsilon = 0.01, convolution = tf.keras.layers.Conv2D, activation = tf.nn.swish, kernel_initializer = "he_normal", **kwargs): super(MBConv, self).__init__(**kwargs) self.filters = filters self.kernel_size = kernel_size self.strides = strides self.expand_ratio = expand_ratio self.se_ratio = se_ratio self.residual = residual self.momentum = momentum self.epsilon = epsilon self.convolution = convolution self.activation = activation self.kernel_initializer = kernel_initializer self.model_layer = layers.LayerNormalization() def build(self, input_shape): self.layers = [] self.post = [] if self.expand_ratio != 1: conv = self.convolution(input_shape[-1] * self.expand_ratio, 1, use_bias = False, kernel_initializer = self.kernel_initializer) norm = tf.keras.layers.BatchNormalization(momentum = self.momentum, epsilon = self.epsilon) act = tf.keras.layers.Activation(self.activation) input_shape = input_shape[:-1] + (input_shape[-1] * self.expand_ratio,) self.layers += [conv, norm, act] #Depthwise Convolution conv = self.convolution(input_shape[-1], self.kernel_size, strides = self.strides, groups = input_shape[-1], padding = "same", use_bias = False, kernel_initializer = self.kernel_initializer) norm = tf.keras.layers.BatchNormalization(momentum = self.momentum, epsilon = self.epsilon) act = tf.keras.layers.Activation(self.activation) self.layers += [conv, norm, act] #Squeeze and Excitation layer, if desired axis = list(range(1, len(input_shape) - 1)) gap = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis = axis, keepdims = True)) squeeze = self.convolution(max(1, int(input_shape[-1] / self.se_ratio)), 1, use_bias = True, kernel_initializer = self.kernel_initializer) act = tf.keras.layers.Activation(self.activation) excitation = self.convolution(input_shape[-1], 1, use_bias = True, kernel_initializer = self.kernel_initializer) se = lambda x: x * tf.nn.sigmoid(excitation(act(squeeze(gap(x))))) self.layers += [se] #Output Phase conv = self.convolution(self.filters, 1, use_bias = False, kernel_initializer = self.kernel_initializer) norm = tf.keras.layers.BatchNormalization(momentum = self.momentum, epsilon = self.epsilon) self.layers += [conv, norm] #Residual if self.residual: if 1 < self.strides: pool = tf.keras.layers.MaxPool2D(pool_size = self.strides + 1, strides = self.strides, padding = "same") self.post.append(pool) if input_shape[-1] != self.filters: resample = self.convolution(self.filters, 1, use_bias = False, kernel_initializer = self.kernel_initializer) self.post.append(resample) def call(self, x): out = x for layer in self.layers: out = layer(out) if self.residual: for layer in self.post: x = layer(x) out = out + x return out def get_config(self): config = super(MBConv, self).get_config() config["filters"] = self.filters config["kernel_size"] = self.kernel_size config["expand_ratio"] = self.expand_ratio config["se_ratio"] = self.se_ratio config["residual"] = self.residual config["momentum"] = self.momentum config["epsilon"] = self.epsilon config["convolution"] = self.convolution config["activation"] = self.activation config["kernel_initializer"] = self.kernel_initializer return config class MultiHeadSelfAttention(tf.keras.layers.Layer): def __init__(self, emb_dim = 768, n_head = 12, out_dim = None, relative_window_size = None, dropout_rate = 0., kernel_initializer = tf.keras.initializers.RandomNormal(mean = 0, stddev = 0.01), **kwargs): #ScaledDotProductAttention super(MultiHeadSelfAttention, self).__init__(**kwargs) self.emb_dim = emb_dim self.n_head = n_head if emb_dim % n_head != 0: raise ValueError("Shoud be embedding dimension % number of heads = 0.") if out_dim is None: out_dim = self.emb_dim self.out_dim = out_dim if relative_window_size is not None and np.ndim(relative_window_size) == 0: relative_window_size = [relative_window_size, relative_window_size] self.relative_window_size = relative_window_size self.projection_dim = emb_dim // n_head self.dropout_rate = dropout_rate self.query = tf.keras.layers.Dense(emb_dim, kernel_initializer = kernel_initializer) self.key = tf.keras.layers.Dense(emb_dim, kernel_initializer = kernel_initializer) self.value = tf.keras.layers.Dense(emb_dim, kernel_initializer = kernel_initializer) self.combine = tf.keras.layers.Dense(out_dim, kernel_initializer = kernel_initializer) def build(self, input_shape): if self.relative_window_size is not None: self.relative_position_bias_table = self.add_weight("relative_position_bias_table", shape = [((2 * self.relative_window_size[0]) - 1) * ((2 * self.relative_window_size[1]) - 1), self.n_head], trainable = self.trainable) coords_h = np.arange(self.relative_window_size[0]) coords_w = np.arange(self.relative_window_size[1]) coords = np.stack(np.meshgrid(coords_h, coords_w, indexing = "ij")) #2, Wh, Ww coords = np.reshape(coords, [2, -1]) relative_coords = np.expand_dims(coords, axis = -1) - np.expand_dims(coords, axis = -2) #2, Wh * Ww, Wh * Ww relative_coords = np.transpose(relative_coords, [1, 2, 0]) #Wh * Ww, Wh * Ww, 2 relative_coords[:, :, 0] += self.relative_window_size[0] - 1 #shift to start from 0 relative_coords[:, :, 1] += self.relative_window_size[1] - 1 relative_coords[:, :, 0] *= 2 * self.relative_window_size[1] - 1 relative_position_index = np.sum(relative_coords, -1) self.relative_position_index = tf.Variable(tf.convert_to_tensor(relative_position_index), trainable = False, name= "relative_position_index") def attention(self, query, key, value, relative_position_bias = None): score = tf.matmul(query, key, transpose_b = True) n_key = tf.cast(tf.shape(key)[-1], tf.float32) scaled_score = score / tf.math.sqrt(n_key) if relative_position_bias is not None: scaled_score = scaled_score + relative_position_bias weight = tf.nn.softmax(scaled_score, axis = -1) if 0 < self.dropout_rate: weight = tf.nn.dropout(weight, self.dropout_rate) out = tf.matmul(weight, value) return out def separate_head(self, x): out = tf.keras.layers.Reshape([-1, self.n_head, self.projection_dim])(x) out = tf.keras.layers.Permute([2, 1, 3])(out) return out def call(self, inputs): query = self.query(inputs) key = self.key(inputs) value = self.value(inputs) query = self.separate_head(query) key = self.separate_head(key) value = self.separate_head(value) relative_position_bias = None if self.relative_window_size is not None: relative_position_bias = tf.gather(self.relative_position_bias_table, tf.reshape(self.relative_position_index, [-1])) relative_position_bias = tf.reshape(relative_position_bias, [self.relative_window_size[0] * self.relative_window_size[1], self.relative_window_size[0] * self.relative_window_size[1], -1]) #Wh * Ww,Wh * Ww, nH relative_position_bias = tf.transpose(relative_position_bias, [2, 0, 1]) #nH, Wh * Ww, Wh * Ww relative_position_bias = tf.expand_dims(relative_position_bias, axis = 0) attention = self.attention(query, key, value, relative_position_bias) attention = tf.keras.layers.Permute([2, 1, 3])(attention) attention = tf.keras.layers.Reshape([-1, self.emb_dim])(attention) out = self.combine(attention) return out def get_config(self): config = super(MultiHeadSelfAttention, self).get_config() config["emb_dim"] = self.emb_dim config["n_head"] = self.n_head config["out_dim"] = self.out_dim config["relative_window_size"] = self.relative_window_size config["projection_dim"] = self.projection_dim config["dropout_rate"] = self.dropout_rate return config class ConvTransformer(tf.keras.layers.Layer): def __init__(self, emb_dim = 768, n_head = 12, strides = 1, out_dim = None, epsilon = 1e-5, dropout_rate = 0., activation = tf.keras.activations.gelu, kernel_initializer = tf.keras.initializers.RandomNormal(mean = 0, stddev = 0.01), **kwargs): super(ConvTransformer, self).__init__(**kwargs) self.emb_dim = emb_dim self.n_head = n_head self.strides = strides self.out_dim = out_dim if out_dim is not None else emb_dim self.epsilon = epsilon self.dropout_rate = dropout_rate self.activation = activation self.kernel_initializer = kernel_initializer def build(self, input_shape): self.attention = [] self.residual = [] #Attention shape = input_shape[1:3] if 1 < self.strides: shape = np.divide(np.add(shape, (self.strides - 1)), self.strides).astype(int) pool = tf.keras.layers.MaxPool2D(pool_size = self.strides + 1, strides = self.strides, padding = "same") self.attention.append(pool) self.residual.append(pool) if input_shape[-1] != self.out_dim: resample = tf.keras.layers.Conv2D(self.out_dim, 1, padding = "same", use_bias = False, kernel_initializer = "he_normal") self.residual.append(resample) pre_reshape = tf.keras.layers.Reshape([-1, input_shape[-1]]) mhsa = MultiHeadSelfAttention(emb_dim = self.emb_dim, n_head = self.n_head, out_dim = self.out_dim, relative_window_size = shape, dropout_rate = self.dropout_rate) post_reshape = tf.keras.layers.Reshape([*shape, self.out_dim]) self.attention += [pre_reshape, mhsa, post_reshape] self.ffn = [] #Feed Forward Network norm = tf.keras.layers.LayerNormalization(epsilon = self.epsilon) dense1 = tf.keras.layers.Dense(self.out_dim, kernel_initializer = self.kernel_initializer) act = tf.keras.layers.Activation(self.activation) dense2 = tf.keras.layers.Dense(self.out_dim, kernel_initializer = self.kernel_initializer) self.ffn = [norm, dense1, act, dense2] def call(self, inputs): out = inputs for layer in self.attention: out = layer(out) for layer in self.residual: inputs = layer(inputs) out = out + inputs for layer in self.ffn: out = layer(out) return out def get_config(self): config = super(ConvTransformer, self).get_config() config["emb_dim"] = self.emb_dim config["n_head"] = self.n_head config["strides"] = self.strides config["out_dim"] = self.out_dim config["epsilon"] = self.epsilon config["dropout_rate"] = self.dropout_rate config["activation"] = self.activation config["kernel_initializer"] = self.kernel_initializer return config def coatnet(x, n_class = 1000, include_top = True, n_depth = [2, 2, 6, 14, 2], n_feature = [64, 96, 192, 384, 768], block = ["C", "M", "M", "T", "T"], stage_stride_size = 2, expand_ratio = 4, se_ratio = 4, dropout_rate = 0., activation = tf.keras.activations.gelu, name = ""): #block : S > Stem, C > MBConv, T > Transformer if 0 < len(name): name += "_" if isinstance(stage_stride_size, int): stage_stride_size = [stage_stride_size] * len(block) out = x for i, (_n_depth, _n_feature, _block, _stage_stride_size) in enumerate(zip(n_depth, n_feature, block, stage_stride_size)): for j in range(_n_depth): stride_size = 1 if j != 0 else _stage_stride_size residual = out if _block.upper() == "C":# i == 0: out = tf.keras.layers.Conv2D(_n_feature, 1 if i != 0 else 3, strides = stride_size, padding = "same", use_bias = False, kernel_initializer = "he_normal", name = "{0}stage{1}_conv{2}".format(name, i, j + 1))(out) out = tf.keras.layers.BatchNormalization(momentum = 0.9, epsilon = 1e-5, name = "{0}stage{1}_norm{2}".format(name, i, j + 1))(out) out = tf.keras.layers.Activation(activation, name = "{0}stage{1}_act{2}".format(name, i, j + 1))(out) elif _block.upper() == "M": out = tf.keras.layers.BatchNormalization(momentum = 0.9, epsilon = 1e-5, name = "{0}stage{1}_pre_norm{2}".format(name, i, j + 1))(out) out = MBConv(_n_feature, 3, strides = stride_size, expand_ratio = expand_ratio, se_ratio = se_ratio, residual = True, momentum = 0.9, epsilon = 1e-5, activation = activation, name = "{0}stage{1}_mbconv{2}".format(name, i, j + 1))(out) elif _block.upper() == "T": out = tf.keras.layers.LayerNormalization(epsilon = 1e-5, name = "{0}stage{1}_pre_norm{2}".format(name, i, j + 1))(out) out = ConvTransformer(32 * 8, 8, strides = stride_size, out_dim = _n_feature, epsilon = 1e-5, activation = activation, name = "{0}stage{1}_transformer{2}".format(name, i, j + 1))(out) if include_top: out = tf.keras.layers.GlobalAveragePooling2D(name = "{0}gap".format(name))(out) if 0 < dropout_rate: out = tf.keras.layers.Dropout(dropout_rate, name = "{0}dropout".format(name))(out) out = tf.keras.layers.Dense(n_class, kernel_initializer = tf.keras.initializers.RandomNormal(mean = 0, stddev = 0.01), name = "{0}logits".format(name))(out) return out def coatnet0(input_tensor = None, input_shape = None, classes = 1000, include_top = True, weights = None): if input_tensor is None: img_input = tf.keras.layers.Input(shape = input_shape) else: if not tf.keras.backend.is_keras_tensor(input_tensor): img_input = tf.keras.layers.Input(tensor = input_tensor, shape = input_shape) else: img_input = input_tensor out = coatnet(img_input, classes, include_top, n_depth = [2, 2, 3, 5, 2], n_feature = [64, 96, 192, 384, 768], block = ["C", "M", "M", "T", "T"], stage_stride_size = 2, expand_ratio = 4, se_ratio = 4, dropout_rate = 0., activation = tf.keras.activations.gelu) model = tf.keras.Model(img_input, out) if weights is not None: model.load_weights(weights) return model def get_model(): model = coatnet0(input_shape = (224, 224, 3), include_top = False) for layer in model.layers[:-1]: layer.trainable = False #adding layers x = tf.keras.layers.Flatten()(model.output) #x = tf.keras.layers.BatchNormalization()(x) #x = tf.keras.layers.Dense(500, activation = tf.keras.activations.gelu)(x) x = tf.keras.layers.Dense(500, activation = tf.keras.activations.gelu, kernel_initializer=tf.keras.initializers.VarianceScaling()`)(x)` #x = tf.keras.layers.Dropout(0.2)(x) #x = tf.keras.layers.Dense(500, activation = tf.keras.activations.gelu)(x) x = tf.keras.layers.Dense(500, activation = tf.keras.activations.gelu, kernel_initializer=tf.keras.initializers.VarianceScaling() )(x) prediction = tf.keras.layers.Dense(2, activation = 'softmax', kernel_initializer=tf.keras.initializers.VarianceScaling() )(x) model = tf.keras.Model(model.input, prediction) model.summary() loss = tf.keras.losses.binary_crossentropy opt = tf.keras.optimizers.Adam(learning_rate=0.00001) metric = ['accuracy'] #weights = compute_class_weight(class_weight = "balanced", classes = np.unique(train_batches.classes), y = train_batches.classes) #cw = dict(zip(np.unique(train_batches.classes), weights)) callbacks = [ #tf.keras.callbacks.ModelCheckpoint("covid_classifier_model.h1", save_best_only=True, verbose = 0), tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_loss', mode = "auto", verbose=1), tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, mode='auto') ] model.compile(optimizer = opt, loss = loss, metrics=metric) return model model.save("my_model")
Softmax output returning only ones and zeros?
my convolutional neural network is returning only ones and zeros on softmax output (out1), anyone knows why? def build(self): inp = Input(self.obs_shape) conv0 = Conv2D(32, 2, 1, padding="same", activation = "relu")(inp) drop0 = MaxPool2D((2,2))(conv0) conv1 = Conv2D(64, 3, 2, padding="same", activation = "relu")(drop0) drop1 = MaxPool2D((2,2))(conv1) flat = Flatten()(drop1) hid0 = Dense(128, activation='relu')(flat) hid1 = Dense(256, activation='relu')(hid0) hid = Dense(128, activation='relu')(hid1) out1 = Dense(self.action_count, activation='softmax')(hid) out2 = Dense(1, activation='linear')(hid) model = Model(inputs = [inp], outputs = [out1, out2]) model.compile(optimizer = tf.keras.optimizers.Adam(lr = self.lr), loss = [self.actor_loss, "mse"]) return model def actor_loss(self, y_actual, y_pred): actions = tf.cast(y_actual[:, 0], tf.int32) returns = y_actual[:, 1] mask = tf.one_hot(actions, self.action_count) logps = tf.math.log(tf.boolean_mask(y_pred, mask) + 1e-3) entropy = -tf.math.reduce_sum(y_pred * tf.math.log(y_pred)) return -tf.math.reduce_sum(logps * returns) - 0.0001*entropy
model = Model(inputs = [inp], outputs = [out1, out2]) look at aboveļ¼ there only two output. so, you function build was lock the number of output, so only get 1 or 0 ; in one word :you need change your models sorry ,my english is bad .
Multidimensional Regression Network in Keras quickly trends to 0
I had a basic keras network predicting one value and it worked fine. I tried adding support for predicting 4 values, but when I do that, the output instantly trends to 0. Right now, the network is the simplistic model just for testing. The input for the network is an array of shape (90,) and the output should have 4 values. I tried having an output layer with 4 nodes as well, but that showed the same behavior of this one. I've also tried some various loss functions. def runNN(training_data,training_labels,test_data,test_labels, model = None): if model == None: inp = (Input(shape = (90,), name = 'input')) model = (Dense(units = 90, activation='relu'(inp) model = (Dropout(0.5))(model) model = (Dense(units = 180, activation='relu'))(model) model = (Dropout(0.5))(model) output1 = Dense(1, activation = 'relu', name = 'preretirement')(model) output2 = Dense(1, activation = 'relu',name = 'cola')(model) output3 = Dense(1, activation = 'relu',name = 'initialNC')(model) output4 = Dense(1, activation = 'relu',name = 'finalNC')(model) complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4]) optimizer = Adam(lr = .0003) complete_model.compile(loss='mean_absolute_error', optimizer=optimizer, metrics=['mean_absolute_error']) complete_model.fit(training_data, {'preretirement' : training_labels[0], 'cola' : training_labels[1], 'initialNC' : training_labels[2], 'finalNC' : training_labels[3]}, epochs = 10, batch_size = 128) The output after 1 epoch, and anything afterwards, is [0,0,0,0] for each test point. It should be a 4 item list with values between 0 and 1 such as [.34,.56,.12,.87]
Probably you're doing a prediction task or function fitting task. Two suggestions might help you: sigmoid usually works better than relu in prediction task. Do not use activation function at the final output layer. The code below is modified from yours and it works fine. from keras.layers import Input, Dense, Dropout from keras.models import Model from keras.optimizers import Adam dropout_rate = .5 activate_function = 'sigmoid' num_iteration = 20 inp = Input(shape = (90,), name = 'input') model = Dense(units = 90, activation=activate_function)(inp) model = Dropout(rate=dropout_rate)(model) model = Dense(units = 180, activation=activate_function)(model) model = Dropout(rate=dropout_rate)(model) output1 = Dense(units=1, name = 'preretirement')(model) output2 = Dense(units=1, name = 'cola')(model) output3 = Dense(units=1, name = 'initialNC')(model) output4 = Dense(units=1, name = 'finalNC')(model) # # Your original code # output1 = Dense(units=1, activation = activate_function, name = 'preretirement')(model) # output2 = Dense(units=1, activation = activate_function,name = 'cola')(model) # output3 = Dense(units=1, activation = activate_function,name = 'initialNC')(model) # output4 = Dense(units=1, activation = activate_function,name = 'finalNC')(model) complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4]) optimizer = Adam(lr = .0003) complete_model.compile(loss='mean_absolute_error', optimizer=optimizer, metrics=['mean_absolute_error']) # generate data for training the model import numpy as np num_train = 4000 # the number of training instances # a normal distribution with mean=2, variance=1 training_data = np.random.normal(2, 1, (num_train, 90)) training_labels = np.zeros(shape=(num_train, 4)) for i in range(num_train): tmp = np.sum(training_data[i, :])/90.0 training_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp] print(training_data.shape, training_labels.shape) # generate data for testing the model test_data = np.random.normal(0, 1, (10, 90)) # 10 test instances test_labels = np.zeros(shape=(10, 4)) for i in range(10): tmp = np.sum(training_data[i, :])/90.0 test_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp] print(test_data.shape, test_labels.shape) complete_model.fit(training_data, {'preretirement' : training_labels[:, 0], 'cola' : training_labels[:, 1], 'initialNC' : training_labels[:, 2], 'finalNC' : training_labels[:, 3]}, epochs = num_iteration, batch_size = 128) results = complete_model.predict(test_data) for i in range(10): print('true', test_labels[i]) print('predicted', results[0][i, 0], results[1][i, 0], results[2][i, 0], results[3][i, 0]) print('--------------------------') The code produces