I am trying to prune a model in tensorflow but coming across an error I don't know how to tackle. The error is ValueError: Please initialize "Prune" with a supported layer. Layers should either be a "PrunableLayer" instance, or should be supported by the PruneRegistry. You passed: <class 'base_transformer_tf.TransformerEncoder'>
The model is created using following
def transformer_encoder(num_columns, num_labels, num_layers, d_model, num_heads, dff, window_size, dropout_rate, weight_decay, label_smoothing, learning_rate):
inp = tf.keras.layers.Input(shape = (window_size, num_columns))
x = tf.keras.layers.BatchNormalization()(inp)
x = tf.keras.layers.Dense(d_model)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('swish')(x)
x = tf.keras.layers.SpatialDropout1D(dropout_rate)(x)
x = TransformerEncoder(num_layers, d_model, num_heads, dff, window_size, dropout_rate)(x)
out = tf.keras.layers.Dense(num_labels, activation = 'sigmoid', dtype=tf.float32)(x[:, -1, :])
model = tf.keras.models.Model(inputs = inp, outputs = out)
model.compile(optimizer = tfa.optimizers.AdamW(weight_decay = weight_decay, learning_rate = learning_rate),
loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = label_smoothing),
metrics = tf.keras.metrics.AUC(name = 'AUC'),
)
return model
The pruning portion of code is following
pruning_params = {
'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.00,
final_sparsity=0.50,
begin_step=0,
end_step=end_step)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
logdir = tempfile.mkdtemp()
callbacks = [
tfmot.sparsity.keras.UpdatePruningStep(),
tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(np.concatenate((X_tr2, X_val)), np.concatenate((y_tr2, y_val)),
batch_size=batch_size, epochs=epochs, validation_split=validation_split,
callbacks=callbacks)
Any help would be appreciated
Tensorflow does not know how to prune your custom TransformerEncoder Keras layer. You should specify which weights to sparsify, as in this example: Prune custom Keras layer or modify parts of layer to prune.
That would look like:
class TransformerEncoder(tf.keras.layers.Layer, tfmot.sparsity.keras.PrunableLayer):
def get_prunable_weights(self):
return [self.my_weight, ..]
Related
I have a pre-trained model, and I created a model based on one layer of the pre-trained as below:
def create_ds_Pretrained(pretrained, maxlen, vocab_size, segment_size=2):
input_wtoken = layers.Input((maxlen,), dtype=tf.int64, name='input_ids')
input_segment = layers.Input((maxlen,), dtype=tf.int64, name='segment_ids')
input_seq = layers.Input((maxlen, 5), dtype=tf.float32, name='seq')
Pretrained_output = pretrained(inputs=[input_wtoken, input_segment]) #
combined_output = layers.Concatenate()([Pretrained_output, input_seq])
normalized_output = layers.LayerNormalization(epsilon=1e-6)(combined_output)
pooled_output = layers.GlobalMaxPooling1D()(normalized_output)
regularization_layer = layers.LeakyReLU(alpha=0.1)(pooled_output)
x1 = layers.Dense(1024, activation="relu")(regularization_layer)
x1 = layers.Dense(1024, activation="relu")(x1)
x1 = layers.Dense(512, activation="relu")(x1)
output_1 = layers.Dense(1, activation="linear", name='output_1')(x1)
model = keras.Model(
inputs=[input_wtoken, input_segment, input_seq],
outputs=[output_1])
return model
I trained the model on below code:
savepath_ds = 'downstream.h5'
savepath_ds_weights = savepath_ds.replace('.h5', '_weights.h5')
savepath_ds_checkpoint = savepath_ds_weights.replace('.h5', '_cp.h5')
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(savepath_ds_checkpoint, monitor="val_loss", verbose=0, mode="min", save_best_only=True, save_weights_only=True, save_freq="epoch" )
early_stop = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=config.patience, verbose=1, mode="min", restore_best_weights=True)
# Load pre-trained upstream model
try:
pretrained_mdl = pretrained_mdl
except:
savepath_pre = 'pretrained_mdl.h5'
pretrained_mdl = tf.keras.models.load_model(savepath_pre)
with strategy.scope():
pretrained_layer = tf.keras.Model(pretrained_mdl.input, pretrained_mdl.get_layer("encoder_"+str(config.NUM_LAYERS-1)+"/ffn_layernormalization").output)
pretrained_layer.trainable = True
ds_mdl = create_ds_Pretrained(pretrained_layer, config.maxlen)
ds_mdl.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.00001, decay=1e-6),
loss={
"output_1": keras.losses.MeanSquaredError()
},
metrics={
"output_1": tf.keras.metrics.RootMeanSquaredError(),
}
)
ds_mdl.fit(
ds_train,
epochs=config.EPOCH,
validation_data=ds_valid,
callbacks=[checkpoint_cb, early_stop]
)
ds_mdl.save(savepath_ds)
ds_mdl.save_weights(savepath_ds_weights)
Now, I want to load the ds_mdl from the saved path..
loading_ds_mdl = tf.keras.models.load_model(savepath_ds)
I got error:
"unable to open object (component not found)"
I think I need to specify custom_objects, but I did not declare a function for the pretrained layer, how to load model in such situation?
Thank you!
I recently created a Tensorflow/Keras model with Keras Transformers. To do this, the custom PositionalEmbedding & TransformerEncoder classes were created and used to build the model architecture. There are created as such:
class PositionalEmbedding(layers.Layer):
def __init__(self, sequence_length, output_dim, **kwargs):
super().__init__(**kwargs)
self.position_embeddings = layers.Embedding(
input_dim=sequence_length, output_dim=output_dim
)
self.sequence_length = sequence_length
self.output_dim = output_dim
def call(self, inputs):
# The inputs are of shape: `(batch_size, frames, num_features)`
length = tf.shape(inputs)[1]
positions = tf.range(start=0, limit=length, delta=1)
embedded_positions = self.position_embeddings(positions)
return inputs + embedded_positions
def compute_mask(self, inputs, mask=None):
mask = tf.reduce_any(tf.cast(inputs, "bool"), axis=-1)
return mask
class TransformerEncoder(layers.Layer):
def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
super().__init__(**kwargs)
self.embed_dim = embed_dim
self.dense_dim = dense_dim
self.num_heads = num_heads
self.attention = layers.MultiHeadAttention(
num_heads=num_heads, key_dim=embed_dim, dropout=0.3
)
self.dense_proj = keras.Sequential(
[layers.Dense(dense_dim, activation=tf.nn.gelu), layers.Dense(embed_dim),]
)
self.layernorm_1 = layers.LayerNormalization()
self.layernorm_2 = layers.LayerNormalization()
def call(self, inputs, mask=None):
if mask is not None:
mask = mask[:, tf.newaxis, :]
attention_output = self.attention(inputs, inputs, attention_mask=mask)
proj_input = self.layernorm_1(inputs + attention_output)
proj_output = self.dense_proj(proj_input)
return self.layernorm_2(proj_input + proj_output)
At first, I was unable to even save this model using the typical model.save() method. However, I was able to solve for this by updating the config for the classes like so:
### FOR THE PositionalEmbedding CLASS
def get_config(self):
config = super().get_config().copy()
config.update({
'position_embeddings': self.position_embeddings,
'sequence_length': self.sequence_length,
'output_dim': self.output_dim
})
return config
### FOR THE TransformerEncoder CLASS
def get_config(self):
config = super().get_config().copy()
config.update({
'embed_dim': self.embed_dim,
'dense_dim': self.dense_dim,
'num_heads': self.num_heads,
'attention': self.attention,
'dense_proj': self.dense_proj,
'layernorm_1': self.layernorm_1,
'layernorm_2': self.layernorm_2
})
return config
However, when I try to load the model using the keras load_model() method without the custom_objects argument, I get the following error:
ValueError: Unknown layer: PositionalEmbedding. Please ensure this object is passed to the `custom_objects` argument.
And if I use the load _model() method without initializing the classes, using the custom_objects argument for the two classes as such load_model('my_model.h5', custom_objects= {'PositionalEmbedding':PositionalEmbedding,'TransformerEncoder':TransformerEncoder}), I get the following error:
NameError: name 'PositionalEmbedding' is not defined
And finally, if I do initialize the classes with the updated configs before loading, and use the load_model() method as shown in the previous example, I get the following error:
TypeError: ('Keyword argument not understood:', 'position_embeddings')
Anyone know what might be causing this issue and how I can resolve them to load this model? Any help is appreciated!
Thanks!
Sam
So I was actually able to solve this problem with a workaround. Instead of saving the model and loading it the old-fashioned way, I saved a checkpoint for the model while training, then loaded it by creating a new model from scratch and loading the checkpoint as the weights.
The code for that is below:
### SAVING THE MODEL WITH CHECKPOINT
filepath = "/content/drive/MyDrive/tmp/model_checkpoint.ckpt"
checkpoint = keras.callbacks.ModelCheckpoint(
filepath, save_weights_only=True, save_best_only=True, verbose=1
)
history = model.fit(
train_data,
train_labels,
validation_split=0.3,
epochs=250,
batch_size=256,
callbacks=[checkpoint],
)
### CREATING NEW MODEL & LOADING CHECKPOINT AS WEIGHTS
def get_compiled_model():
sequence_length = MAX_SEQ_LENGTH
embed_dim = NUM_FEATURES
dense_dim = 4
num_heads = 1
classes = len(label_processor.get_vocabulary())
inputs = keras.Input(shape=(None, None))
x = PositionalEmbedding(
sequence_length, embed_dim, name="frame_position_embedding"
)(inputs)
x = TransformerEncoder(embed_dim, dense_dim, num_heads, name="transformer_layer")(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(classes, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.compile(
optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
return model
model = get_compiled_model()
model.load_weights("/content/drive/MyDrive/tmp/model_checkpoint.ckpt")
I am training a model for text sentiment classification with CNN. In it, the validation accuracy is initially more than training accuracy and then it decreases. Is this behavior acceptable? If not then what can be the reason and how to solve it?
My model:
class hyper():
def __init__(self,embedding_dim,filter_sizes,num_filters,dropout_prob,hidden_dims,batch_size,num_epochs):
# Model Hyperparameters
self.embedding_dim = embedding_dim
self.filter_sizes = filter_sizes
self.num_filters = num_filters
self.dropout_prob = dropout_prob
self.hidden_dims = hidden_dims
# Training parameters
self.batch_size = batch_size
self.num_epochs = num_epochs
class prep_hyper():
def __init__(self,sequenceLength,max_words):
# Prepossessing parameters
self.sequenceLength = sequenceLength
self.max_words = max_words
m_hyper=hyper(embedding_dim=embed_dim,filter_sizes=(3,4,5,6,8),num_filters=80,dropout_prob=(0.2,0.5),
hidden_dims=50,batch_size=128,num_epochs= 30)
pr_hyper = prep_hyper(sequenceLength=sequence_length,max_words=vocab_size)
model architecture:
def build_model(pr_hyper,m_hyper):
# Convolutional block
model_input = Input(shape=(pr_hyper.sequenceLength))
# use a random embedding for the text
x = Embedding(pr_hyper.max_words, m_hyper.embedding_dim,weights=[emb],trainable=False)(model_input)
# x = SpatialDropout1D(m_hyper.dropout_prob[0])(x)
conv_kern_reg = regularizers.l2(0.0001)
conv_bias_reg = regularizers.l2(0.0001)
conv_blocks = []
for sz in m_hyper.filter_sizes:
conv = Convolution1D(filters=m_hyper.num_filters,
kernel_size=sz,
# padding="same",
activation="relu",
strides=1,
kernel_regularizer=conv_kern_reg,
bias_regularizer=conv_bias_reg
)(x)
conv = GlobalMaxPooling1D()(conv)
conv_blocks.append(conv)
# merge
x = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0]
x = Dense(m_hyper.hidden_dims, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
x = Dense(100, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
model_output = Dense(3, activation="softmax")(x)
model = Model(model_input, model_output)
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.00005), metrics=["accuracy"]) #categorical_crossentropy
print(model.summary())
tf.keras.utils.plot_model(model, show_shapes=True)#, to_file='multichannel.png')
return model
INITIAL EPOCHS:
There are several reasons that this happens, like, the dropout layers is disabled during validation. For more information I would suggest you to see this
that describes several possible reasons that this happens.
I have built the following encoder-decoder architecture, and the encoder and decoder both work fine separately:
from tensorflow.keras.layers import LSTM, Input, Reshape, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
WORD_TO_INDEX = {"foo": 0, "bar": 1}
MAX_QUERY_WORD_COUNT = 10
QUERY_ENCODING_SIZE = 15
# ENCODER
query_encoder_input = Input(shape=(None, len(WORD_TO_INDEX)), name="query_encoder_input")
query_encoder_output = LSTM(QUERY_ENCODING_SIZE, name="query_encoder_lstm")(query_encoder_input)
query_encoder = Model(inputs=query_encoder_input, outputs=query_encoder_output)
# DECODER
query_decoder_input = Input(shape=(QUERY_ENCODING_SIZE,), name="query_decoder_input")
query_decoder_reshape = Reshape((1, QUERY_ENCODING_SIZE), name="query_decoder_reshape")(query_decoder_input)
query_decoder_lstm = LSTM(QUERY_ENCODING_SIZE, name="query_decoder_lstm", return_sequences=True, return_state=True)
recurrent_input, state_h, state_c = query_decoder_lstm(query_decoder_reshape)
states = [state_h, state_c]
query_decoder_outputs = []
for _ in range(MAX_QUERY_WORD_COUNT):
recurrent_input, state_h, state_c = query_decoder_lstm(recurrent_input, initial_state=states)
query_decoder_outputs.append(recurrent_input)
states = [state_h, state_c]
query_decoder_output = Lambda(lambda x: K.concatenate(x, axis=1), name="query_decoder_concat")(query_decoder_outputs)
query_decoder = Model(inputs=query_decoder_input, outputs=query_decoder_output)
But when I try to join them together to create an autoencoder, I get an odd error and I don't know why.
# AUTOENCODER
# apply the reshape layer to the output of the encoder
query_autoencoder_output = query_decoder.layers[1](query_encoder_output)
# rebuild the autoencoder by applying each layer of the decoder to the output of the encoder
for decoder_layer in query_decoder.layers[2:]:
# this fails and I don't know why
query_autoencoder_output = decoder_layer(query_autoencoder_output)
# the code never gets here
query_autoencoder = Model(inputs=query_encoder_input, outputs=query_autoencoder_output)
This throws the error:
ValueError: Shape must be rank 3 but is rank 2 for '{{node
query_decoder_concat/concat_1}} = ConcatV2[N=3, T=DT_FLOAT,
Tidx=DT_INT32](query_decoder_lstm/PartitionedCall_11:1,
query_decoder_lstm/PartitionedCall_11:2,
query_decoder_lstm/PartitionedCall_11:3,
query_decoder_concat/concat_1/axis)' with input shapes: [?,1,15],
[?,15], [?,15], [].
This is the template I used for my decoder. (See the "What if I don't want to use teacher forcing for training?" section.)
I relied on these StackOverflow questions (especially the last one) to figure out how to combine the models together.
What does this error mean and how can I fix it?
You can treat a model as a layer * essentially *. With an autoencoder, it'll be as straightforward as something like this:
autoencoder = Sequential([encoder, decoder])
If you want some extra flexibility you can subclass tf.keras.Model:
class AutoEncoder(tf.keras.Model):
def __init__(self, encoder, decoder):
super(AutoEncoder, self).__init__()
self.encoder = encoder
self.decoder = decoder
def call(self, inputs, training=None, **kwargs):
x = self.encoder(inputs)
x = self.decoder(x)
return x
ae = AutoEncoder(encoder, decoder)
ae.fit(...
Full reproducible example:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow import keras
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
import numpy as np
(xtrain, ytrain), (xtest, ytest) = keras.datasets.cifar10.load_data()
train_ix = np.where(ytrain.ravel() == 1)
test_ix = np.where(ytest.ravel() == 1)
cars_train = xtrain[train_ix]
cars_test = xtest[test_ix]
cars = np.vstack([cars_train, cars_test]).astype(np.float32)/255
X = tf.data.Dataset.from_tensor_slices(cars).batch(8)
class Encoder(keras.Model):
def __init__(self):
super(Encoder, self).__init__()
self.flat = keras.layers.Flatten(input_shape=(32, 32, 3))
self.dense1 = keras.layers.Dense(128)
self.dense2 = keras.layers.Dense(32)
def call(self, inputs, training=None, **kwargs):
x = self.flat(inputs)
x = keras.activations.selu(self.dense1(x))
x = keras.activations.selu(self.dense2(x))
return x
class Decoder(keras.Model):
def __init__(self):
super(Decoder, self).__init__()
self.dense1 = keras.layers.Dense(128, input_shape=[32])
self.dense2 = keras.layers.Dense(32 * 32 * 3)
self.reshape = keras.layers.Reshape([32, 32, 3])
def call(self, inputs, training=None, **kwargs):
x = keras.activations.selu(self.dense1(inputs))
x = keras.activations.sigmoid(self.dense2(x))
x = self.reshape(x)
return x
class AutoEncoder(keras.Model):
def __init__(self, encoder, decoder):
super(AutoEncoder, self).__init__()
self.encoder = encoder
self.decoder = decoder
def call(self, inputs, training=None, **kwargs):
x = self.encoder(inputs)
x = self.decoder(x)
return x
ae = AutoEncoder(Encoder(), Decoder())
loss_object = keras.losses.BinaryCrossentropy()
reconstruction_loss = keras.metrics.Mean(name='reconstruction_loss')
optimizer = keras.optimizers.Adam()
#tf.function
def reconstruct(inputs):
with tf.GradientTape() as tape:
out = ae(inputs)
loss = loss_object(inputs, out)
gradients = tape.gradient(loss, ae.trainable_variables)
optimizer.apply_gradients(zip(gradients, ae.trainable_variables))
reconstruction_loss(loss)
if __name__ == '__main__':
template = 'Epoch {:2} Reconstruction Loss {:.4f}'
for epoch in range(50):
reconstruction_loss.reset_states()
for input_batches in X:
reconstruct(input_batches)
print(template.format(epoch + 1, reconstruction_loss.result()))
Output:
Epoch 35 Reconstruction Loss 0.5794
Epoch 36 Reconstruction Loss 0.5793
Epoch 37 Reconstruction Loss 0.5792
Epoch 38 Reconstruction Loss 0.5791
Epoch 39 Reconstruction Loss 0.5790
Epoch 40 Reconstruction Loss 0.5789
Based off of M Z's answer, but without using Sequential, you can do this:
query_autoencoder = Model(inputs=query_encoder_input, outputs=query_decoder(query_encoder_output))
query_autoencoder.summary()
The summary also breaks down into more layers than M Z's answer did.
For anyone using Javascript and Tensorflow JS, here is an example of how to make deeper networks.
I thought I would include this since most Javascript examples of autoencoders show only one encoding and decoding layer.
function buildModel (layers)
{
const model = tf.sequential();
layers.forEach(layer =>
{
model.add(layer);
});
return model;
}
Then you can make a deeper network with more encoding and decoding layers with:
// [ encoder ] [ decoder ]
// Model layers: 20 -> 10 -> 2 -> 10 -> 20
// Encoding Layers
const encoder = [
tf.layers.dense({ inputShape: [20], units: 10, activation: 'relu' }),
tf.layers.dense({ units: 2, activation: 'relu' })
];
// Decoding Layers
const decoder = [
tf.layers.dense({ units: 10, activation: 'relu' }),
tf.layers.dense({ units: 20, activation: 'relu' })
];
// Build model
const model = buildModel([...encoder, ...decoder]);
// ... Then compile and Train ...
After training, predictions can be made with:
const predictionModel = buildModel([...encoder]);
predictionModel.predict(data);
I'm having some trouble to convert a pure Keras model to TensorFlow Estimator API on an unbalanced dataset.
When using pure Keras API, the class_weight parameter is available at model.fit method, but when converting a Keras model to TensorFlow Estimator with tensorflow.python.keras.estimator.model_to_estimator there is no place to inform class_weights.
How can overcome this?
I'm using TF 1.12 on Ubuntu 18, Cuda 9, Cudnn 7
Pure Keras model:
def keras_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
return model
model = keras_model(5)
model.fit(train_X, train_Y_onehot,
batch_size=128,
epochs=10,
validation_data=(eval_X,eval_Y_onehot),
class_weight=class_weights,
verbose=1)
Keras model to TensorFlow Estimator:
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
model.compile(loss='categorical_crossentropy',
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
I've codded a workaround and it seems to work. I
import tensorflow as tf
from tensorflow.python.keras import backend as K
def weighted_loss_fn(class_weights):
def _loss_fn(y_true, y_pred):
class_weights_tensor = K.variable(class_weights)
y_true_labels = K.argmax(y_true,axis=1)
weights = K.gather(class_weights_tensor,y_true_labels)
return tf.losses.softmax_cross_entropy(onehot_labels=y_true, logits=y_pred, weights=weights)
return _loss_fn
def keras_estimator_model(n_classes=None, model_dir='./tmp-model/', config=None, class_weights=None):
with tf.device('/gpu:0'):
# Inputs
inp_raw = Input(shape=(max_len,), name='word_raw')
# raw text LSTM network
word_raw_emb = Embedding(
input_dim=nunique_chars_raw,
output_dim=EMBED_SIZE,
input_length=MAX_WORD_LENGTH,
trainable=True,
name='word_raw_emb')(inp_raw)
word_raw_emb = Dropout(rate=dropout_rate)(word_raw_emb)
word_raw_emb_lstm = Bidirectional(
LSTM(48, return_sequences=True))(word_raw_emb)
word_raw_emb_gru = Bidirectional(
GRU(48, return_sequences=False))(word_raw_emb_lstm)
word_raw_net = Dense(16, activation='relu')(word_raw_emb_gru)
output_raw_net = Dense(n_classes, activation='softmax')(word_raw_net)
model = Model(inputs=inp_raw, outputs=output_raw_net)
optz = keras.optimizers.RMSprop(
lr=0.002, rho=0.9, epsilon=None, decay=0.0)
loss_fn = weighted_loss_fn(class_weights)
model.compile(loss=loss_fn,
optimizer=optz, metrics=['categorical_accuracy'])
model_estimator = model_to_estimator(keras_model=model, model_dir=model_dir, config=config)
return model_estimator
estimator_model = keras_estimator_model(5)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,max_steps=10)
eval_spec = tf.estimator.EvalSpec(
input_fn=eval_input_fn,
steps=None,
start_delay_secs=10,
throttle_secs=10)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)
In my case class_weights = [ 0.17041813 42.00318471 35.26470588 29.70495495 42.00318471 44.55743243]
The class_weight param is used for weighting the loss function (during training only). So, to get the same effect you have to alter your loss function. I did not find any existing TensorFlow's loss functions, only feature request for the cross entropy loss with class weights. There is a workaround mentioned:
Here
By the way, the issue was closed, because it is possible in Keras :)