Tensorflow time series classification with metadata: preprocessing layer and dataset integration - python

I am working on a neural net for time series classification with metadata.
My problem is that I have a data frame with different datatypes, which all need to be preprocessed. In doing so I generate a lot of different input layers with individual names. But in my tf.dataset the data frame is saved as a whole and thus the keras.fit function expects one input instead of many.
Do you have any idea as to how I can get the model.fit to accept the data I have and maybe even get it all into a nicer software architecture?
import pandas as pd
import tensorflow as tf
def get_normalization_layer(
dataset, input_name, all_inputs, encoded_features, axis=None
):
normalization_layer = tf.keras.layers.Normalization(axis=axis)
feature_ds = dataset.map(lambda x, y: x[input_name])
normalization_layer.adapt(feature_ds)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.float32)
normalization_layer = normalization_layer(input_layer)
all_inputs.append(input_layer)
encoded_features.append(normalization_layer)
def get_category_encoding_layer(
dataset,
input_name,
dtype,
all_inputs,
encoded_features,
vocabulary=None,
max_tokens=None,
):
if dtype == "string":
index = tf.keras.layers.StringLookup(
max_tokens=max_tokens, vocabulary=vocabulary
)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.string)
elif dtype == "int":
index = tf.keras.layers.IntegerLookup(
max_tokens=max_tokens, vocabulary=vocabulary
)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.int64)
if vocabulary is None:
feature_ds = dataset.map(lambda x, y: x[input_name])
index.adapt(feature_ds)
encoder = tf.keras.layers.CategoryEncoding(num_tokens=index.vocabulary_size())
encoder = encoder(index(input_layer))
all_inputs.append(input_layer)
encoded_features.append(encoder)
################################################################################
measurement_data = np.random.rand(150, 11, 2400)
meta_data = pd.DataFrame(
data={
"Product": ["A", "B", "C", "D"],
"Length": [23, 22, 21, 24],
"Width": [11.2, 23.4, 57.35, 0],
"Labels": [0, 0, 1, 0],
}
)
################################################################################
dataframe = meta_data.copy()
labels = dataframe.pop("Labels")
dataframe = {key: np.array(value)[:, tf.newaxis] for key, value in dataframe.items()}
dataset_measurement = tf.data.Dataset.from_tensor_slices(measurement_data)
dataset_meta = tf.data.Dataset.from_tensor_slices((dataframe, labels))
################################################################################
all_inputs = []
encoded_features = []
normalization_layer = tf.keras.layers.Normalization(axis=1)
feature_ds = dataset_measurement.map(lambda x: x)
normalization_layer.adapt(feature_ds)
input_layer = tf.keras.Input(
shape=list(feature_ds)[0].shape,
name="measurement_input",
dtype=tf.float32,
)
normalization_layer = normalization_layer(input_layer)
all_inputs.append(input_layer)
encoded_features.append(normalization_layer)
get_category_encoding_layer(
dataset_meta, "Product", "string", all_inputs, encoded_features
)
get_category_encoding_layer(dataset_meta, "Length", "int", all_inputs, encoded_features)
get_normalization_layer(dataset_meta, "Width", all_inputs, encoded_features)
dataset = tf.data.Dataset.zip((dataset_measurement, dataset_meta))
dataset = dataset.map(
lambda x, y: ({"measurement_input": x, "meta_input": y[:-1]}, y[-1])
)
dataset = dataset.batch(64)
dataset = dataset.prefetch(64)
################################################################################
conv1 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(
encoded_features[0]
)
conv1 = tf.keras.layers.BatchNormalization()(conv1)
conv1 = tf.keras.layers.ReLU()(conv1)
gap = tf.keras.layers.GlobalAveragePooling1D()(conv1)
all_features = tf.keras.layers.concatenate(encoded_features[1:])
x1 = tf.keras.layers.Dense(128, activation="relu")(all_features)
x1 = tf.keras.layers.Dropout(0.5)(x1)
meta_and_measurement = tf.keras.layers.concatenate([gap, x1])
f1 = tf.keras.layers.Dense(128, activation="relu")(meta_and_measurement)
f1 = tf.keras.layers.Dropout(0.5)(f1)
output_layer = tf.keras.layers.Dense(2, activation="softmax")(f1)
################################################################################
model = tf.keras.models.Model(inputs=all_inputs, outputs=output_layer)
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["sparse_categorical_accuracy"],
)
# ValueError: Missing data for input "Product".
# You passed a data dictionary with keys ['measurement_input', 'meta_input'].
# Expected the following keys: ['measurement_input', 'Product', 'Length', 'Width']
history = model.fit(
dataset,
epochs=50,
verbose=1,
)

I found an answer, but it feels rather hacky and it would be lovely if someone had a better idea. You can use a function to strip and recombine the dataset in a way that tensorflow likes:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
def get_normalization_layer(
dataset, input_name, all_inputs, encoded_features, axis=None
):
normalization_layer = tf.keras.layers.Normalization(axis=axis)
feature_ds = dataset.map(lambda x, y: x[input_name])
normalization_layer.adapt(feature_ds)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.float32)
normalization_layer = normalization_layer(input_layer)
all_inputs.append(input_layer)
encoded_features.append(normalization_layer)
def get_category_encoding_layer(
dataset,
input_name,
dtype,
all_inputs,
encoded_features,
vocabulary=None,
max_tokens=None,
):
if dtype == "string":
index = tf.keras.layers.StringLookup(
max_tokens=max_tokens, vocabulary=vocabulary
)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.string)
elif dtype == "int":
index = tf.keras.layers.IntegerLookup(
max_tokens=max_tokens, vocabulary=vocabulary
)
input_layer = tf.keras.Input(shape=(1,), name=input_name, dtype=tf.int64)
if vocabulary is None:
feature_ds = dataset.map(lambda x, y: x[input_name])
index.adapt(feature_ds)
encoder = tf.keras.layers.CategoryEncoding(num_tokens=index.vocabulary_size())
encoder = encoder(index(input_layer))
all_inputs.append(input_layer)
encoded_features.append(encoder)
################################################################################
measurement_data = np.random.rand(150, 11, 2400)
meta_data = pd.DataFrame(
data={
"Product": ["A", "A", "C", "D"],
"Length": [23, 22, 21, 24],
"Width": [11.2, 23.4, 57.35, 0],
"Labels": [0, 0, 1, 0],
}
)
################################################################################
dataframe = meta_data.copy()
labels = dataframe.pop("Labels")
dataframe = {key: np.array(value)[:, tf.newaxis] for key, value in dataframe.items()}
dataset_measurement = tf.data.Dataset.from_tensor_slices(measurement_data)
dataset_meta = tf.data.Dataset.from_tensor_slices((dataframe, labels))
################################################################################
all_inputs = []
encoded_features = []
normalization_layer = tf.keras.layers.Normalization(axis=1)
feature_ds = dataset_measurement.map(lambda x: x)
normalization_layer.adapt(feature_ds)
input_layer = tf.keras.Input(
shape=list(feature_ds)[0].shape,
name="measurement_input",
dtype=tf.float32,
)
normalization_layer = normalization_layer(input_layer)
all_inputs.append(input_layer)
encoded_features.append(normalization_layer)
get_category_encoding_layer(
dataset_meta, "Product", "string", all_inputs, encoded_features
)
get_category_encoding_layer(dataset_meta, "Length", "int", all_inputs, encoded_features)
get_normalization_layer(dataset_meta, "Width", all_inputs, encoded_features)
dataset = tf.data.Dataset.zip((dataset_measurement, dataset_meta))
def map_func(x, y):
meta_names = [name for name in y[0]]
meta_values = list(y[0].values())
meta = dict(zip(meta_names, meta_values))
dictinary = {"measurement_input": x}
dictinary.update(meta)
result = (dictinary, y[-1])
return result
data_set = dataset.map(map_func)
data_set = data_set.batch(64)
data_set = data_set.prefetch(64)
################################################################################
conv1 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(
encoded_features[0]
)
conv1 = tf.keras.layers.BatchNormalization()(conv1)
conv1 = tf.keras.layers.ReLU()(conv1)
gap = tf.keras.layers.GlobalAveragePooling1D()(conv1)
all_features = tf.keras.layers.concatenate(encoded_features[1:])
x1 = tf.keras.layers.Dense(128, activation="relu")(all_features)
x1 = tf.keras.layers.Dropout(0.5)(x1)
meta_and_measurement = tf.keras.layers.concatenate([gap, x1])
f1 = tf.keras.layers.Dense(128, activation="relu")(meta_and_measurement)
f1 = tf.keras.layers.Dropout(0.5)(f1)
output_layer = tf.keras.layers.Dense(2, activation="softmax")(f1)
################################################################################
model = tf.keras.models.Model(inputs=all_inputs, outputs=output_layer)
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["sparse_categorical_accuracy"],
)
tf.keras.utils.plot_model(
model,
to_file=os.path.join(os.getcwd(), "model.png"),
show_shapes=True,
show_dtype=True,
)
history = model.fit(
data_set,
epochs=50,
verbose=1,
)

Related

Keras text classification from scratch - error implementing

I'm implementing Keras text classification from scratch on an army dataset of reviews but I'm getting an error of logits and labels must have the same shape. (None,1) vs ().
The code below is a bit long but it has the tensor preprocesing and the model is a functional API:
max_features = 200
sequence_length = None
embedding_dim = 128
from tensorflow.keras.layers import TextVectorization
dtrain_lab = data_train[['airline_sentiment','negativereason']].to_numpy()
display(dtrain_lab)
tlist_txt = data_train['negativereason'].tolist()
tlist_sent = data_train['airline_sentiment'].tolist()
rac = 0
for k in tlist_txt:
rap = tlist_txt[rac]
if pd.isnull(rap) == True:
tlist_txt[rac] = 'empty'
rac+=1
#p-prueba
p_list = []
for i in tlist_sent:
if i == 'positive':
p_list.append(1)
if i == 'negative' or i == 'neutral':
p_list.append(0)
train_sent = np.array(p_list)
val_txt = data_val['negativereason'].tolist()
val_sent = data_val['airline_sentiment'].tolist()
l_val = []
for j in val_sent:
if j == 'positive':
l_val.append(1)
if j == 'negative' or j == 'neutral':
l_val.append(0)
sent_val_na = np.array(l_val)
dac = 0
for k in val_txt:
hap = val_txt[dac]
if pd.isnull(hap) == True:
val_txt[dac] = 'empty'
dac+=1
tftrain_db = tf.data.Dataset.from_tensor_slices((tlist_txt, train_sent))
tfval_db = tf.data.Dataset.from_tensor_slices((val_txt, sent_val_na))
vectorize_layer = TextVectorization(
standardize='lower_and_strip_punctuation',
split="whitespace",
max_tokens=200,
output_mode="int",
output_sequence_length=30,
)
def vectorize_text(text, label):
text = tf.expand_dims(text, -1)
return vectorize_layer(text), label
text_ds = tftrain_db.map(lambda x, y: x)
vectorize_layer.adapt(text_ds)
v_dbtrain = tftrain_db.map(vectorize_text)
v_dbval = tfval_db.map(vectorize_text)
from tensorflow.keras import layers
inputs = tf.keras.Input(shape=(None,), dtype="int64")
x = layers.Embedding(max_features, embedding_dim)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation="relu" input_shape = (None,))(x)
x = layers.Dropout(0.5)(x)
predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x)
model = tf.keras.Model(inputs, predictions)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
epochs = 3
model.fit(v_dbtrain, validation_data= tfval_db, epochs=epochs)
Error is:
ValueError: `logits` and `labels` must have the same shape, received ((None, 1) vs ()).

Vision transformer binary classifier is only predicting one class

I wrote a code for a vision transformer to classify mammograms into benign and malignant. After training for 30 epochs, the model is, however, predicting only one class(benign). All the final predictions for test images are in the range: 0.47 - 0.49.
The code:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255,
samplewise_center = True,
samplewise_std_normalization = True,
validation_split = 0.1,
rotation_range=180,
shear_range=15,
zoom_range=0.2,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
vertical_flip=True,
fill_mode='reflect')
train_gen = datagen.flow_from_dataframe(dataframe = DF_TRAIN,
directory = TRAIN_PATH,
x_col = 'image_file_path',
y_col = 'pathology',
subset = 'training',
batch_size = BATCH_SIZE,
# seed = 1,
color_mode = 'rgb',
shuffle = True,
class_mode = 'binary',
target_size = (IMAGE_SIZE, IMAGE_SIZE))
valid_gen = datagen.flow_from_dataframe(dataframe = DF_TRAIN,
directory = TRAIN_PATH,
x_col = 'image_file_path',
y_col = 'pathology',
subset = 'validation',
batch_size = BATCH_SIZE,
# seed = 1,
color_mode = 'rgb',
shuffle = False,
class_mode = 'binary',
target_size = (IMAGE_SIZE, IMAGE_SIZE))
test_gen = datagen.flow_from_dataframe(dataframe = DF_TEST,
directory = TEST_PATH,
x_col = 'image_file_path',
y_col = 'pathology',
# subset = 'validation',
batch_size = BATCH_SIZE,
# seed = 1,
color_mode = 'rgb',
shuffle = False,
class_mode = 'binary',
target_size = (IMAGE_SIZE, IMAGE_SIZE))
def mlp(x, hidden_units, dropout_rate):
for units in hidden_units:
x = layers.Dense(units, activation=tf.nn.gelu)(x)
x = layers.Dropout(dropout_rate)(x)
return x
class Patches(layers.Layer):
def __init__(self, patch_size):
super(Patches, self).__init__()
self.patch_size = patch_size
def call(self, images):
batch_size = tf.shape(images)[0]
patches = tf.image.extract_patches(
images=images,
sizes=[1, self.patch_size, self.patch_size, 1],
strides=[1, self.patch_size, self.patch_size, 1],
rates=[1, 1, 1, 1],
padding="VALID",
)
patch_dims = patches.shape[-1]
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
return patches
def get_config(self):
config = super().get_config().copy()
config.update({
'patch_size': self.patch_size,
})
return config
class PatchEncoder(layers.Layer):
def __init__(self, num_patches, projection_dim):
super(PatchEncoder, self).__init__()
self.num_patches = num_patches
self.projection = layers.Dense(units=projection_dim)
self.position_embedding = layers.Embedding(
input_dim=num_patches, output_dim=projection_dim
)
def call(self, patch):
positions = tf.range(start=0, limit=self.num_patches, delta=1)
encoded = self.projection(patch) + self.position_embedding(positions)
return encoded
def get_config(self):
config = super().get_config().copy()
config.update({
'num_patches': self.num_patches,
'projection': self.projection,
'position_embedding': self.position_embedding,
})
return config
def create_vit_classifier():
inputs = layers.Input(shape=input_shape)
# Create patches.
patches = Patches(patch_size)(inputs)
# Encode patches.
encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)
# Create multiple layers of the Transformer block.
for _ in range(transformer_layers):
# Layer normalization 1.
x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
# Create a multi-head attention layer.
attention_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=projection_dim, dropout=0.1)(x1, x1)
# Skip connection 1.
x2 = layers.Add()([attention_output, encoded_patches])
# Layer normalization 2.
x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
# MLP.
x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
# Skip connection 2.
encoded_patches = layers.Add()([x3, x2])
# Create a [batch_size, projection_dim] tensor.
representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
representation = layers.Flatten()(representation)
representation = layers.Dropout(0.5)(representation)
# Add MLP.
features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
# Classify outputs.
logits = layers.Dense(num_classes, activation="sigmoid")(features)
# Create the Keras model.
model = tf.keras.Model(inputs=inputs, outputs=logits)
return model
def run_experiment(model):
optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate, weight_decay=weight_decay)
model.compile(
optimizer=optimizer,
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
STEP_SIZE_TRAIN = train_gen.n // train_gen.batch_size
STEP_SIZE_VALID = valid_gen.n // valid_gen.batch_size
print(STEP_SIZE_TRAIN, STEP_SIZE_VALID)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy',
factor=0.2,
patience=2,
verbose=1,
min_delta=1e-4,
min_lr=1e-6,
mode='max')
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='./model_3.hdf5',
monitor='val_accuracy',
verbose=1,
save_best_only=True,
save_weights_only=True,
mode='max')
callbacks = [reduce_lr, checkpointer]
history = model.fit(x=train_gen,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=valid_gen,
validation_steps=STEP_SIZE_VALID,
epochs=EPOCHS,
callbacks=callbacks,
verbose=1)
model.save(f'{save_path}/model_3.h5')
return history
vit_classifier = create_vit_classifier()
history = run_experiment(vit_classifier)
vit_classifier.load_weights(f'{save_path}/model_3.h5')
A = vit_classifier.predict(test_gen, steps = test_gen.n // test_gen.batch_size + 1)
predicted_classes = np.where(A > 0.5, 1, 0)
true_classes = test_gen.classes
class_labels = list(test_gen.class_indices.keys())
results = pd.DataFrame(list(zip(test_gen.filenames, true_classes, predicted_classes)),
columns =['Image name', 'True class', 'Predicted class'])
results = results.replace({"True class": classes_dict})
results = results.replace({"Predicted class": classes_dict})
prob = pd.DataFrame(A, columns =['Predicted probability'])
result_df = pd.concat([results, prob], axis=1)
result_df['Predicted probability'] = pd.Series(["{0:.1f}".format(val * 100) for val in result_df['Predicted probability']], index=result_df.index)
results_csv = f'{save_path}/results_3.csv'
with open(results_csv, mode='w') as f:
result_df.to_csv(f)
Confusion matrix:
[[428 0]
[276 0]]
Performance metrics:
Please help me figure out how to rectify this problem

TensorFlow ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list)

I'm trying to write this code into colab. Interestingly, I was running the same code in colab a few days ago but now it won't work. the code also works in kaggle kernel. I tried changing the TensorFlow version but all of them give different errors. Why do you think I can't run this code? This is the colab notebook if you needed more info.
Thanks in advance!
class DisasterDetector:
def __init__(self, tokenizer, bert_layer, max_len =30, lr = 0.0001,
epochs = 15, batch_size = 32, dtype = tf.int32 ,
activation = 'sigmoid', optimizer = 'SGD',
beta_1=0.9, beta_2=0.999, epsilon=1e-07,
metrics = 'accuracy', loss = 'binary_crossentropy'):
self.lr = lr
self.epochs = epochs
self.max_len = max_len
self.batch_size = batch_size
self.tokenizer = tokenizer
self.bert_layer = bert_layer
self.models = []
self.activation = activation
self.optimizer = optimizer
self.dtype = dtype
self.beta_1 = beta_1
self.beta_2 = beta_2
self.epsilon =epsilon
self.metrics = metrics
self.loss = loss
def encode(self, texts):
all_tokens = []
masks = []
segments = []
for text in texts:
tokenized = self.tokenizer.convert_tokens_to_ids(['[CLS]'] + self.tokenizer.tokenize(text) + ['[SEP]'])
len_zeros = self.max_len - len(tokenized)
padded = tokenized + [0] * len_zeros
mask = [1] * len(tokenized) + [0] * len_zeros
segment = [0] * self.max_len
all_tokens.append(padded)
masks.append(mask)
segments.append(segment)
print(len(all_tokens[0]))
return np.array(all_tokens), np.array(masks), np.array(segments)
def make_model(self):
input_word_ids = Input(shape = (self.max_len, ), dtype=tf.int32,
name = 'input_word_ids')
input_mask = Input(shape = (self.max_len, ), dtype=tf.int32,
name = 'input_mask')
segment_ids = Input(shape = (self.max_len, ), dtype=tf.int32,
name = 'segment_ids')
#pooled output is the output of dimention and
pooled_output, sequence_output = self.bert_layer([input_word_ids,
input_mask,
segment_ids])
clf_output = sequence_output[:, 0, :]
out = tf.keras.layers.Dense(1, activation = self.activation)(clf_output)
#out = tf.keras.layers.Dense(1, activation = 'sigmoid', input_shape = (clf_output,) )(clf_output)
model = Model(inputs = [input_word_ids, input_mask, segment_ids],
outputs = out)
if self.optimizer is 'SGD':
optimizer = SGD(learning_rate = self.lr)
elif self.optimizer is 'Adam':
optimizer = Adam(learning_rate = self.lr, beta_1=self.beta_1,
beta_2=self.beta_2, epsilon=self.epsilon)
model.compile(loss = self.loss, optimizer = self.optimizer,
metrics = [self.metrics])
return model
def train(self, x, k = 3):
kfold = StratifiedKFold(n_splits = k, shuffle = True)
for fold, (train_idx, val_idx) in enumerate(kfold.split(x['cleaned_text'], x['target'])):
print('fold: ', fold)
x_trn = self.encode(x.loc[train_idx, 'cleaned_text'])
x_val = self.encode(x.loc[val_idx, 'cleaned_text'])
y_trn = np.array(x.loc[train_idx, 'target'], dtype = np.uint8)
y_val = np.array(x.loc[val_idx, 'target'], dtype = np.uint8)
print('the data type of y train: ', type(y_trn))
print('x_val shape', x_val[0].shape)
print('x_trn shape', x_trn[0].shape)
model = self.make_model()
print('model made.')
model.fit(x_trn, tf.convert_to_tensor(y_trn),
validation_data = (x_val, tf.convert_to_tensor(y_val)),
batch_size=self.batch_size, epochs = self.epochs)
self.models.append(model)
and after calling the train function of the class I get that error.
classifier = DisasterDetector(tokenizer = tokenizer, bert_layer = bert_layer, max_len = max_len, lr = 0.0001,
epochs = 10, activation = 'sigmoid',
batch_size = 32,optimizer = 'SGD',
beta_1=0.9, beta_2=0.999, epsilon=1e-07)
classifier.train(train_cleaned)
and here is the error:
ValueError Traceback (most
recent call last)
<ipython-input-10-106c756f2e47> in <module>()
----> 1 classifier.train(train_cleaned)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
96 dtype = dtypes.as_dtype(dtype).as_datatype_enum
97 ctx.ensure_initialized()
---> 98 return ops.EagerTensor(value, ctx.device_name, dtype)
99
100
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).
Well, it turns out that by not giving the appropriate maximum sequence length, TensorFlow throws this error. By changing the max_len variable to 54 I could run my program with no difficulty. So the problem was not about the type of the input or the numpy arrays.

Tensorflow - Changing dropout value has no effect on network

I trained a network to perform semantic segmentation with dropout, and it is my understanding that as you vary the dropout keep_prob value, the output prediction changes. However, after saving the model using the tensorflow-serving method, loading it using tf.saved_model.loader.load, and varying the dropout value, I get the same output prediction value (dice score).
I followed the suggestions in this SO post, but I still get the same prediction results even if I enter 0.0.
Didn't know if it was a tensorflow issue or a bug in my code, so I tried downgrading from v1.15 to v1.10 to see if it was the former and still got the same results. I am sure it is a bug in my code now, but I am not sure where it is. A minimum working example is shown below. Could someone help me? Thank you!
This is a snippet of my training script:
#===============
def run_iteration(self, feed_dict, op_list, summaries):
output_args = self.sess.run(op_list, feed_dict=feed_dict)
return output_args
#===============
def run_epoch_train(self, curr_epoch):
print('Training over all batches')
num_total_batches = self.num_total_batches_train
curr_batch_counter = 0
# for each batch in training images
for batch in self.batch_iterator_train:
# dropout is included
if self.dropout_training_Flag == 1:
_, loss, dice = self.run_iteration(
feed_dict={
self.placeholders['images']: batch['images'],
self.placeholders['labels']: batch['labels'],
self.placeholders['is_training']: True,
self.placeholders['dropout_prob']: self.dropout_prob_training,
},
op_list=[
self.fitting_op,
self.losses[self.active_loss],
#self.outputs['sigmoid'],
self.outputs['dice'],
],
summaries=[],
)
curr_batch_counter = curr_batch_counter + 1
if (self.iteration % 5) == 0:
print('Saving model in training session')
self.saver.save(curr_epoch + 1)
This is a snippet of my testing script:
#===============
path_to_model = self.root_path_to_models + '/' + '25'
print(path_to_model)
model = tf.saved_model.loader.load( #tf.saved_model.loader.load(
sess,
[tf.saved_model.tag_constants.SERVING],
path_to_model
)
inputImage_name = model.signature_def['prediction'].inputs['images'].name
x_inp = tf.get_default_graph().get_tensor_by_name(inputImage_name)
isTraining_name = model.signature_def['prediction'].inputs['is_training'].name
tflag_op = tf.get_default_graph().get_tensor_by_name(isTraining_name)
outputs_name = model.signature_def['prediction'].outputs['sigmoid'].name
y_op = tf.get_default_graph().get_tensor_by_name(outputs_name)
if self.dropout_training_Flag == 1:
dropoutProb_name = model.signature_def['prediction'].inputs['dropout_prob'].name
dropout_prob_op = tf.get_default_graph().get_tensor_by_name(dropoutProb_name)
print(dropout_prob_op)
# iterate over batches of images
# iterate over motion category
for moCat in self.motion_categories:
# get datasets in motion category
datasets_in_moCat = d_ffn_images_labels[moCat]
dataset_name = list(datasets_in_moCat.keys())[-1]
#print(dataset_name)
loss_for_each_image = []
final_vol = np.zeros((self.original_input_image_width, self.original_input_image_height, self.num_vol_slices), dtype = np.uint8)
# get images
curr_dataset_images = datasets_in_moCat[dataset_name][0][0]
# get labels
curr_dataset_labels = datasets_in_moCat[dataset_name][0][1]
#current dataset label numbers
curr_dataset_label_numbers = d_bfnumber_images_labels[moCat][dataset_name]
#print('curr_dataset_label_numbers',curr_dataset_label_numbers)
# number of images/labels in current dataset, for current category
num_images = len(curr_dataset_images)
num_labels = len(curr_dataset_labels)
# check if num-images/labels are the same
assert(num_images == num_labels)
# load each image
for elem_idx in range(num_images):
img_path = curr_dataset_images[elem_idx]
lab_path = curr_dataset_labels[elem_idx]
xn = nib.load(img_path)
x = np.array(xn.dataobj)
labn = nib.load(lab_path)
lab = np.array(labn.dataobj)
data_affine_tform = xn.affine
# resize
xr = cv2.resize(x, (self.network_input_image_width, self.network_input_image_height), interpolation = cv2.INTER_LANCZOS4)
# standardize
y = standardize_zeroMeanUnitVar_image(copy.deepcopy(xr), self.network_input_image_width, self.network_input_image_height, self.network_input_channels)
#y = cv2.normalize(copy.deepcopy(xr), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
# match network input -- [height, width, channels]
y = np.reshape(y, newshape=(self.network_input_image_height, self.network_input_image_width, self.network_input_channels))
# append to match network input -- [batch, height, width, channels]
input_list = []
input_list.append(y)
input_list = np.asarray(input_list).astype(np.float32)
# ======================
# MODIFY DROPOUT HERE FROM JSON FILE
# CHANGED VALUES FROM 0.0, 0.5, 1.0 -- same prediction score
# ======================
# run and get output
if self.dropout_training_Flag == 1:
output = sess.run(y_op, feed_dict={x_inp: input_list, tflag_op: True, dropout_prob_op: self.dropout_prob_testing})
else:
output = sess.run(y_op, feed_dict={x_inp: input_list, tflag_op: False})
tmpOut = cv2.resize(output[0,:,:,0], (self.original_input_image_width, self.original_input_image_height), interpolation = cv2.INTER_LANCZOS4)
prediction = np.asarray((tmpOut > 0.5))
labels = np.asarray((lab > 0))
EPS = 0.0000001
#output_original = cv2.resize(output[0,:,:,0], (original_input_image_width, original_input_image_height), interpolation = cv2.INTER_LANCZOS4)
loss = 2.0 * np.sum(labels * prediction, axis=(0, 1)) / (np.sum(labels ** 2 + prediction ** 2, axis=(0, 1)) + EPS)
loss_for_each_image.append(loss)
#place slice in final_vol
#print(curr_dataset_label_numbers[elem_idx][1])
#print(type(curr_dataset_label_numbers[elem_idx][1]))
final_vol[:,:,curr_dataset_label_numbers[elem_idx][1] - 1] = np.asarray(prediction*255.0).astype(np.uint8)
# dice mean over dataset
dice_mean_for_dataset = np.mean(loss_for_each_image)
print(dataset_name, dice_mean_for_dataset)
self.diceScore_for_each_dataset.append(dice_mean_for_dataset)
self.list_dataset_name.append(dataset_name)
This is the code for the inputs/outputs:
#===============
def create_placeholders(self):
self.placeholders['images'] = tf.placeholder(
shape=[None] + self.network_input_size + [self.network_input_channels],
name='images',
dtype=tf.float32
)
self.placeholders['labels'] = tf.placeholder(
shape=[None] + self.network_input_size + [self.network_output_channels],
name='labels',
dtype=tf.float32
)
self.placeholders['is_training'] = tf.placeholder(
shape=[],
name='is_training',
dtype=tf.bool
)
# dropout is included
if self.dropout_training_Flag == 1:
self.placeholders['dropout_prob'] = tf.placeholder(
shape=[],
name='dropout_prob',
dtype=tf.float32
)
#===============
def create_outputs(self):
if self.network_name == 'UNet':
print('\n')
print('Training UNet')
# dropout is included
if self.dropout_training_Flag == 1:
# train with dropout
unet_output = unet_dropout(
self.placeholders['images'],
self.placeholders['is_training'],
self.placeholders['dropout_prob'],
self.network_output_channels
)
if self.network_output_channels == 1:
self.outputs['sigmoid'] = unet_output
else:
self.outputs['sigmoid'] = unet_output
This is the code for my model:
#===============
def batch_norm_relu(inputs, is_training):
net = slim.batch_norm(inputs, is_training=is_training)
net = tf.nn.relu(net)
return net
#===============
def dropout (input, keep_prob, is_training):
if is_training == True:
dropout = tf.nn.dropout(input, keep_prob)
else:
dropout = input
return dropout
#===============
def model(inputs, is_training, keep_prob, num_classes):
with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
base_num_kernels = 64
# =================================
# encoder
# 256
x = conv2d_fixed_padding(inputs=inputs, filters=base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
x = conv2d_fixed_padding(inputs=x, filters=base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
output_b1 = x
output_list_b1 = [x]
output_b1 = dropout(output_b1, keep_prob, is_training)
output_b1 = tf.layers.max_pooling2d(inputs=output_b1, pool_size=2, strides=2, padding='SAME')
# =================================
# 128
x = conv2d_fixed_padding(inputs=output_b1, filters=2*base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
x = conv2d_fixed_padding(inputs=x, filters=2*base_num_kernels, kernel_size=3, stride=1)
x = batch_norm_relu(x, is_training)
output_b2 = x
output_list_b2 = [x]
output_b2 = dropout(output_b2, keep_prob, is_training)
# =================================
# decoder
# 128 -> 256
output_b3 = conv2d_transpose(output_b2, kernel_size=2, output_channels=base_num_kernels)
output_b4 = tf.concat([output_b3, x], axis=3)
# =================================
# 256
conv_final = conv2d_fixed_padding(inputs=output_b4, filters=base_num_kernels, kernel_size=3, stride=1)
conv_final = batch_norm_relu(conv_final, is_training)
conv_final = conv2d_fixed_padding(inputs=conv_final, filters=base_num_kernels, kernel_size=3, stride=1)
conv_final = batch_norm_relu(conv_final, is_training)
# =================================
# output
outputs = conv2d_fixed_padding(inputs=conv_final, filters=num_classes, kernel_size=3, stride=1)
if num_classes == 1:
outputs = tf.nn.sigmoid(outputs)
else:
h = outputs.get_shape().as_list()[1]
w = outputs.get_shape().as_list()[2]
outputs_reshaped = tf.reshape(outputs, np.asarray([-1, num_classes]))
outputs_final = tf.nn.softmax(outputs_reshaped)
outputs = tf.reshape(outputs_final, np.asarray([-1, h, w, num_classes]))
return outputs
This is the way that I save the network weights:
#===============
def __create_summary_manager(self):
self.saver = Saver(
self.sess,
self.placeholders,
self.outputs,
self.savepath
)
#===============
import tensorflow as tf
class Saver(object):
def __init__(self, sess, input_dict, output_dict, path):
self.sess = sess
self.input_dict = input_dict
self.output_dict = output_dict
self.path = path
self.iteration = 0
self.input_dict_info = {}
self.output_dict_info = {}
for key in input_dict.keys():
self.input_dict_info[key] = \
tf.saved_model.utils.build_tensor_info(
self.input_dict[key]
)
for key in output_dict.keys():
self.output_dict_info[key] = \
tf.saved_model.utils.build_tensor_info(
self.output_dict[key]
)
self.prediction_signature = (
tf.saved_model.signature_def_utils.build_signature_def(
inputs=self.input_dict_info,
outputs=self.output_dict_info)
)
def save(self, iteration_val):
self.iteration += 1
export_path = os.path.join(
tf.compat.as_bytes(self.path),
tf.compat.as_bytes(str(iteration_val))
)
self.builder = tf.saved_model.builder.SavedModelBuilder(export_path)
self.builder.add_meta_graph_and_variables(
self.sess, [tf.saved_model.tag_constants.SERVING],
signature_def_map={
'prediction': self.prediction_signature,
}
)
self.builder.save()

Loading the CNN model and predict the CSV file

I'm learning the basic CNN model by using tensorflow. After training my model, I want to load it and use the model to predict the hand-written digital img (CSV file).
Here is my CNN model:
import random
import os
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
tf.logging.set_verbosity(tf.logging.ERROR)
class CNNLogisticClassification:
def __init__(self, shape_picture, n_labels,
learning_rate=0.5, dropout_ratio=0.5, alpha=0.0):
self.shape_picture = shape_picture
self.n_labels = n_labels
self.weights = None
self.biases = None
self.graph = tf.Graph() # initialize new grap
self.build(learning_rate, dropout_ratio, alpha) # building graph
self.sess = tf.Session(graph=self.graph) # create session by the graph
def build(self, learning_rate, dropout_ratio, alpha):
with self.graph.as_default():
### Input
self.train_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.train_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
### Optimalization
# build neurel network structure and get their predictions and loss
self.y_, self.original_loss = self.structure(pictures=self.train_pictures,
labels=self.train_labels,
dropout_ratio=dropout_ratio,
train=True, )
# regularization loss
self.regularization = \
tf.reduce_sum([tf.nn.l2_loss(w) for w in self.weights.values()]) \
/ tf.reduce_sum([tf.size(w, out_type=tf.float32) for w in self.weights.values()])
# total loss
self.loss = self.original_loss + alpha * self.regularization
# define training operation
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
self.train_op = optimizer.minimize(self.loss)
### Prediction
self.new_pictures = tf.placeholder(tf.float32,
shape=[None]+self.shape_picture,name="Input")
self.new_labels = tf.placeholder(tf.int32,
shape=(None, self.n_labels),name="Output")
self.new_y_, self.new_original_loss = self.structure(pictures=self.new_pictures,
labels=self.new_labels)
self.new_loss = self.new_original_loss + alpha * self.regularization
### Initialization
self.init_op = tf.global_variables_initializer()
### save model
self.saver=tf.train.Saver()
def structure(self, pictures, labels, dropout_ratio=None, train=False):
### Variable
## LeNet5 Architecture(http://yann.lecun.com/exdb/lenet/)
# input:(batch,28,28,1) => conv1[5x5,6] => (batch,24,24,6)
# pool2 => (batch,12,12,6) => conv2[5x5,16] => (batch,8,8,16)
# pool4 => fatten5 => (batch,4x4x16) => fc6 => (batch,120)
# (batch,120) => fc7 => (batch,84)
# (batch,84) => fc8 => (batch,10) => softmax
if (not self.weights) and (not self.biases):
self.weights = {
'conv1': tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6),
stddev=0.1)),
'conv3': tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16),
stddev=0.1)),
'fc6': tf.Variable(tf.truncated_normal(shape=(4*4*16, 120),
stddev=0.1)),
'fc7': tf.Variable(tf.truncated_normal(shape=(120, 84),
stddev=0.1)),
'fc8': tf.Variable(tf.truncated_normal(shape=(84, self.n_labels),
stddev=0.1)),
}
self.biases = {
'conv1': tf.Variable(tf.zeros(shape=(6))),
'conv3': tf.Variable(tf.zeros(shape=(16))),
'fc6': tf.Variable(tf.zeros(shape=(120))),
'fc7': tf.Variable(tf.zeros(shape=(84))),
'fc8': tf.Variable(tf.zeros(shape=(self.n_labels))),
}
### Structure
conv1 = self.get_conv_2d_layer(pictures,
self.weights['conv1'], self.biases['conv1'],
activation=tf.nn.relu)
pool2 = tf.nn.max_pool(conv1,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
conv3 = self.get_conv_2d_layer(pool2,
self.weights['conv3'], self.biases['conv3'],
activation=tf.nn.relu)
pool4 = tf.nn.max_pool(conv3,
ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
fatten5 = self.get_flatten_layer(pool4)
if train:
fatten5 = tf.nn.dropout(fatten5, keep_prob=1-dropout_ratio[0])
fc6 = self.get_dense_layer(fatten5,
self.weights['fc6'], self.biases['fc6'],
activation=tf.nn.relu)
if train:
fc6 = tf.nn.dropout(fc6, keep_prob=1-dropout_ratio[1])
fc7 = self.get_dense_layer(fc6,
self.weights['fc7'], self.biases['fc7'],
activation=tf.nn.relu)
logits = self.get_dense_layer(fc7, self.weights['fc8'], self.biases['fc8'])
y_ = tf.nn.softmax(logits)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=labels,
logits=logits))
return (y_, loss)
def get_dense_layer(self, input_layer, weight, bias, activation=None):
x = tf.add(tf.matmul(input_layer, weight), bias)
if activation:
x = activation(x)
return x
def get_conv_2d_layer(self, input_layer,
weight, bias,
strides=(1, 1), padding='VALID', activation=None):
x = tf.add(
tf.nn.conv2d(input_layer,
weight,
[1, strides[0], strides[1], 1],
padding=padding), bias)
if activation:
x = activation(x)
return x
def get_flatten_layer(self, input_layer):
shape = input_layer.get_shape().as_list()
n = 1
for s in shape[1:]:
n *= s
x = tf.reshape(input_layer, [-1, n])
return x
def fit(self, X, y, epochs=10,
validation_data=None, test_data=None, batch_size=None):
X = self._check_array(X)
y = self._check_array(y)
N = X.shape[0]
random.seed(9000)
if not batch_size:
batch_size = N
self.sess.run(self.init_op)
for epoch in range(epochs):
print('Epoch %2d/%2d: ' % (epoch+1, epochs))
# mini-batch gradient descent
index = [i for i in range(N)]
random.shuffle(index)
while len(index) > 0:
index_size = len(index)
batch_index = [index.pop() for _ in range(min(batch_size, index_size))]
feed_dict = {
self.train_pictures: X[batch_index, :],
self.train_labels: y[batch_index],
}
_, loss = self.sess.run([self.train_op, self.loss],
feed_dict=feed_dict)
print('[%d/%d] loss = %.4f ' % (N-len(index), N, loss), end='\r')
# evaluate at the end of this epoch
y_ = self.predict(X)
train_loss = self.evaluate(X, y)
train_acc = self.accuracy(y_, y)
msg = '[%d/%d] loss = %8.4f, acc = %3.2f%%' % (N, N, train_loss, train_acc*100)
if validation_data:
val_loss = self.evaluate(validation_data[0], validation_data[1])
val_acc = self.accuracy(self.predict(validation_data[0]), validation_data[1])
msg += ', val_loss = %8.4f, val_acc = %3.2f%%' % (val_loss, val_acc*100)
print(msg)
if test_data:
test_acc = self.accuracy(self.predict(test_data[0]), test_data[1])
print('test_acc = %3.2f%%' % (test_acc*100))
def accuracy(self, predictions, labels):
return (np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/predictions.shape[0])
def predict(self, X):
X = self._check_array(X)
return self.sess.run(self.new_y_, feed_dict={self.new_pictures: X})
def evaluate(self, X, y):
X = self._check_array(X)
y = self._check_array(y)
return self.sess.run(self.new_loss, feed_dict={self.new_pictures: X,
self.new_labels: y})
def _check_array(self, ndarray):
ndarray = np.array(ndarray)
if len(ndarray.shape) == 1:
ndarray = np.reshape(ndarray, (1, ndarray.shape[0]))
return ndarray
if __name__ == '__main__':
print('Extract MNIST Dataset ...')
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)
train_data = mnist.train
valid_data = mnist.validation
test_data = mnist.test
train_img = np.reshape(train_data.images, [-1, 28, 28, 1])
valid_img = np.reshape(valid_data.images, [-1, 28, 28, 1])
test_img = np.reshape(test_data.images, [-1, 28, 28, 1])
model = CNNLogisticClassification(
shape_picture=[28, 28, 1],
n_labels=10,
learning_rate=0.07,
dropout_ratio=[0.2, 0.1],
alpha=0.1,
)
model.fit(
X=train_img,
y=train_data.labels,
epochs=10,
validation_data=(valid_img, valid_data.labels),
test_data=(test_img, test_data.labels),
batch_size=32,
)
saver = model.saver.save(model.sess, "test_model")
print("Model saved in path: %s" % saver)
And I create another py file to load my model:
import tensorflow as tf
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
sess.run(tf.global_variables_initializer())
saver.predict('D:\python\number_data\3.csv')
This is the error I'm getting:
AttributeError: 'Saver' object has no attribute 'predict'
How do I fix it and let the trained model predict my CSV file?
Thanks in advance for your help!
Edit:
I change my second py file as below:
import numpy as np
import tensorflow as tf
import pandas as pd
X=pd.read_csv('D:/PYTHON/cnn_data/7.csv', index_col=None, header=None).values
X1=X/255
X3=tf.convert_to_tensor(
X1,
dtype=None,
dtype_hint=None,
name=None
)
saver = tf.train.import_meta_graph('./my_model/test_model.meta')
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./my_model/test_model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./my_model'))
graph=tf.get_default_graph()
xs0=graph.get_tensor_by_name("Input:0")
prediction=graph.get_tensor_by_name("Output:0")
sess.run(prediction,feed_dict={xs0:X3})
print(prediction)
I only try to predict one digital img data(CSV file with one row), I transfer it into tensor type and name my two placeholder "Input" , "Output", but get another error:
TypeError: The value of a feed cannot be a tf.Tensor object. Acceptable feed values include Python scalars, strings, lists, numpy ndarrays, or TensorHandles. For reference, the tensor object was Tensor("Const:0", shape=(1, 784), dtype=float64) which was passed to
the feed with key Tensor("Input:0", shape=(?, 28, 28, 1), dtype=float32).
>
First of all, the obvious error here is that you are trying to call a function that doesn't exist. Evidently, the saver object does not have a predict function.
Second, if you want Tensorflow to make predictions, you need to provide it with "Tensorflow" input, and sadly, CSVs are not one of them.
All you need to do is transform your CSV inputs into tensors, with a function like this for instance:
filename = 'D:\python\number_data\3.csv'
def csv_to_tensor(filename):
...
return tensors
I cannot tell you how to implement the function exactly since I don't know the exact format of your data, but I am assuming that each row in your file is an input. So you most likely just need to loop through the lines in your file and convert each line to a tensor, which can then be used by a Tensorflow model.

Categories