How to join an encoder and a decoder - python

I have built the following encoder-decoder architecture, and the encoder and decoder both work fine separately:
from tensorflow.keras.layers import LSTM, Input, Reshape, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
WORD_TO_INDEX = {"foo": 0, "bar": 1}
MAX_QUERY_WORD_COUNT = 10
QUERY_ENCODING_SIZE = 15
# ENCODER
query_encoder_input = Input(shape=(None, len(WORD_TO_INDEX)), name="query_encoder_input")
query_encoder_output = LSTM(QUERY_ENCODING_SIZE, name="query_encoder_lstm")(query_encoder_input)
query_encoder = Model(inputs=query_encoder_input, outputs=query_encoder_output)
# DECODER
query_decoder_input = Input(shape=(QUERY_ENCODING_SIZE,), name="query_decoder_input")
query_decoder_reshape = Reshape((1, QUERY_ENCODING_SIZE), name="query_decoder_reshape")(query_decoder_input)
query_decoder_lstm = LSTM(QUERY_ENCODING_SIZE, name="query_decoder_lstm", return_sequences=True, return_state=True)
recurrent_input, state_h, state_c = query_decoder_lstm(query_decoder_reshape)
states = [state_h, state_c]
query_decoder_outputs = []
for _ in range(MAX_QUERY_WORD_COUNT):
recurrent_input, state_h, state_c = query_decoder_lstm(recurrent_input, initial_state=states)
query_decoder_outputs.append(recurrent_input)
states = [state_h, state_c]
query_decoder_output = Lambda(lambda x: K.concatenate(x, axis=1), name="query_decoder_concat")(query_decoder_outputs)
query_decoder = Model(inputs=query_decoder_input, outputs=query_decoder_output)
But when I try to join them together to create an autoencoder, I get an odd error and I don't know why.
# AUTOENCODER
# apply the reshape layer to the output of the encoder
query_autoencoder_output = query_decoder.layers[1](query_encoder_output)
# rebuild the autoencoder by applying each layer of the decoder to the output of the encoder
for decoder_layer in query_decoder.layers[2:]:
# this fails and I don't know why
query_autoencoder_output = decoder_layer(query_autoencoder_output)
# the code never gets here
query_autoencoder = Model(inputs=query_encoder_input, outputs=query_autoencoder_output)
This throws the error:
ValueError: Shape must be rank 3 but is rank 2 for '{{node
query_decoder_concat/concat_1}} = ConcatV2[N=3, T=DT_FLOAT,
Tidx=DT_INT32](query_decoder_lstm/PartitionedCall_11:1,
query_decoder_lstm/PartitionedCall_11:2,
query_decoder_lstm/PartitionedCall_11:3,
query_decoder_concat/concat_1/axis)' with input shapes: [?,1,15],
[?,15], [?,15], [].
This is the template I used for my decoder. (See the "What if I don't want to use teacher forcing for training?" section.)
I relied on these StackOverflow questions (especially the last one) to figure out how to combine the models together.
What does this error mean and how can I fix it?

You can treat a model as a layer * essentially *. With an autoencoder, it'll be as straightforward as something like this:
autoencoder = Sequential([encoder, decoder])

If you want some extra flexibility you can subclass tf.keras.Model:
class AutoEncoder(tf.keras.Model):
def __init__(self, encoder, decoder):
super(AutoEncoder, self).__init__()
self.encoder = encoder
self.decoder = decoder
def call(self, inputs, training=None, **kwargs):
x = self.encoder(inputs)
x = self.decoder(x)
return x
ae = AutoEncoder(encoder, decoder)
ae.fit(...
Full reproducible example:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow import keras
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
import numpy as np
(xtrain, ytrain), (xtest, ytest) = keras.datasets.cifar10.load_data()
train_ix = np.where(ytrain.ravel() == 1)
test_ix = np.where(ytest.ravel() == 1)
cars_train = xtrain[train_ix]
cars_test = xtest[test_ix]
cars = np.vstack([cars_train, cars_test]).astype(np.float32)/255
X = tf.data.Dataset.from_tensor_slices(cars).batch(8)
class Encoder(keras.Model):
def __init__(self):
super(Encoder, self).__init__()
self.flat = keras.layers.Flatten(input_shape=(32, 32, 3))
self.dense1 = keras.layers.Dense(128)
self.dense2 = keras.layers.Dense(32)
def call(self, inputs, training=None, **kwargs):
x = self.flat(inputs)
x = keras.activations.selu(self.dense1(x))
x = keras.activations.selu(self.dense2(x))
return x
class Decoder(keras.Model):
def __init__(self):
super(Decoder, self).__init__()
self.dense1 = keras.layers.Dense(128, input_shape=[32])
self.dense2 = keras.layers.Dense(32 * 32 * 3)
self.reshape = keras.layers.Reshape([32, 32, 3])
def call(self, inputs, training=None, **kwargs):
x = keras.activations.selu(self.dense1(inputs))
x = keras.activations.sigmoid(self.dense2(x))
x = self.reshape(x)
return x
class AutoEncoder(keras.Model):
def __init__(self, encoder, decoder):
super(AutoEncoder, self).__init__()
self.encoder = encoder
self.decoder = decoder
def call(self, inputs, training=None, **kwargs):
x = self.encoder(inputs)
x = self.decoder(x)
return x
ae = AutoEncoder(Encoder(), Decoder())
loss_object = keras.losses.BinaryCrossentropy()
reconstruction_loss = keras.metrics.Mean(name='reconstruction_loss')
optimizer = keras.optimizers.Adam()
#tf.function
def reconstruct(inputs):
with tf.GradientTape() as tape:
out = ae(inputs)
loss = loss_object(inputs, out)
gradients = tape.gradient(loss, ae.trainable_variables)
optimizer.apply_gradients(zip(gradients, ae.trainable_variables))
reconstruction_loss(loss)
if __name__ == '__main__':
template = 'Epoch {:2} Reconstruction Loss {:.4f}'
for epoch in range(50):
reconstruction_loss.reset_states()
for input_batches in X:
reconstruct(input_batches)
print(template.format(epoch + 1, reconstruction_loss.result()))
Output:
Epoch 35 Reconstruction Loss 0.5794
Epoch 36 Reconstruction Loss 0.5793
Epoch 37 Reconstruction Loss 0.5792
Epoch 38 Reconstruction Loss 0.5791
Epoch 39 Reconstruction Loss 0.5790
Epoch 40 Reconstruction Loss 0.5789

Based off of M Z's answer, but without using Sequential, you can do this:
query_autoencoder = Model(inputs=query_encoder_input, outputs=query_decoder(query_encoder_output))
query_autoencoder.summary()
The summary also breaks down into more layers than M Z's answer did.

For anyone using Javascript and Tensorflow JS, here is an example of how to make deeper networks.
I thought I would include this since most Javascript examples of autoencoders show only one encoding and decoding layer.
function buildModel (layers)
{
const model = tf.sequential();
layers.forEach(layer =>
{
model.add(layer);
});
return model;
}
Then you can make a deeper network with more encoding and decoding layers with:
// [ encoder ] [ decoder ]
// Model layers: 20 -> 10 -> 2 -> 10 -> 20
// Encoding Layers
const encoder = [
tf.layers.dense({ inputShape: [20], units: 10, activation: 'relu' }),
tf.layers.dense({ units: 2, activation: 'relu' })
];
// Decoding Layers
const decoder = [
tf.layers.dense({ units: 10, activation: 'relu' }),
tf.layers.dense({ units: 20, activation: 'relu' })
];
// Build model
const model = buildModel([...encoder, ...decoder]);
// ... Then compile and Train ...
After training, predictions can be made with:
const predictionModel = buildModel([...encoder]);
predictionModel.predict(data);

Related

A question from CGAN which indicates me "InvalidArgumentError"

I was trainning the MNIST dataset from keras. And I copy the example from keras. First I met the problem, I tried to record each variable when concating. But I didn't find any strange shape that the ERROR indicates me. Here is the code:
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import numpy as np
batch_size = 64
num_channels = 1
num_classes = 10
image_size = 28
latent_dim = 128
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
all_digits = np.concatenate([x_train, x_test])
all_labels = np.concatenate([y_train, y_test])
all_digits = all_digits.astype("float32") / 255.0
# print(all_digits.shape)
all_digits = np.reshape(all_digits, (-1, 28, 28, 1))
# print(all_digits.shape)
all_labels = keras.utils.to_categorical(all_labels, 10)
dataset = tf.data.Dataset.from_tensor_slices((all_digits, all_labels))
dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)
generator_in_channels = latent_dim + num_classes
discriminator_in_channels = num_channels + num_classes
discriminator = keras.Sequential(
[
keras.layers.InputLayer((28,28,discriminator_in_channels)),
layers.Conv2D(64,(3,3),strides=(2,2),padding='same'),
layers.LeakyReLU(alpha=0.2),
layers.Conv2D(128,(3,3),strides=(2,2),padding="same"),
layers.LeakyReLU(alpha=0.2),
layers.GlobalMaxPool2D(),
layers.Dense(1)
],
name='discriminator'
)
generator = keras.Sequential(
[
keras.layers.InputLayer((generator_in_channels,)),
layers.Dense(7*7*generator_in_channels),
layers.LeakyReLU(alpha=0.2),
layers.Reshape((7,7,generator_in_channels)),
layers.Conv2DTranspose(8,(4,4),strides=(2,2),padding='same'),
layers.LeakyReLU(alpha=0.2),
layers.Conv2DTranspose(8,(4,4),strides=(2,2),padding='same'),
layers.LeakyReLU(alpha=0.2),
layers.Conv2D(1,(3,3),padding='same',activation='sigmoid')
],
name='generator'
)
generator.summary()
discriminator.summary()
class ConditionalGan(keras.Model):
def __init__(self,discriminator,generator,latent_dim):
super(ConditionalGan,self).__init__()
self.discriminator = discriminator
self.generator = generator
self.latent_dim = latent_dim
self.gen_loss_tracker = keras.metrics.Mean(name='generator_loss')
self.disc_loss_tracker = keras.metrics.Mean(name='discriminator_loss')
#property
def metrics(self):
return [self.gen_loss_tracker,self.disc_loss_tracker]
def compile(self,d_optimizer,g_optimizer,loss_fn):
super(ConditionalGan,self).compile()
self.d_optimizer = d_optimizer
self.g_optimizer = g_optimizer
self.loss_fn = loss_fn
def train_step(self,data):
real_image , one_hot_labels = data
image_one_hot_labels = one_hot_labels[:,:,None,None]
image_one_hot_labels = tf.repeat(image_one_hot_labels,repeats=[image_size*image_size])
image_one_hot_labels = tf.reshape(image_one_hot_labels,shape=(-1,image_size,image_size,num_classes))
#Disciminator
random_latent_vector = tf.random.normal(shape=(batch_size,latent_dim))
print(random_latent_vector.shape)
print(one_hot_labels.shape)
random_vector_labels = tf.concat((random_latent_vector,one_hot_labels),axis=1)
generator_image = self.generator(random_vector_labels)
fake_image_and_labels = tf.concat([generator_image,image_one_hot_labels],-1)
real_image_and_labels = tf.concat([real_image,image_one_hot_labels],-1)
print(generator_image.shape)
print(real_image.shape)
combine_images = tf.concat([real_image_and_labels,fake_image_and_labels],0)
labels = tf.concat([tf.ones((batch_size,1)),tf.zeros((batch_size,1))],0)
with tf.GradientTape() as tape:
pred = self.discriminator(combine_images)
d_loss = self.loss_fn(labels,pred)
grads = tape.gradient(d_loss,self.discriminator.trainable_weights)
self.d_optimizer.apply_gradients(zip(grads,self.discriminator.trainable_weights))
#Generator
random_latent_vector = tf.random.normal(shape=(batch_size,latent_dim))
random_vector_labels = tf.concat((random_latent_vector,one_hot_labels),-1)
print(random_latent_vector.shape)
print(one_hot_labels.shape)
misleading_labels = tf.zeros((batch_size,1))
with tf.GradientTape() as tape:
fake_image = self.generator(random_vector_labels)
print(fake_image.shape)
print(image_one_hot_labels.shape)
fake_image_and_labels = tf.concat([fake_image,image_one_hot_labels],-1)
pred = self.discriminator(fake_image_and_labels)
g_loss = self.loss_fn(misleading_labels,pred)
grads = tape.gradient(g_loss,self.generator.trainable_weights)
self.g_optimizer.apply_gradients(zip(grads,self.generator.trainable_weights))
#Monitor loss.
self.gen_loss_tracker.update_state(g_loss)
self.disc_loss_tracker.update_state(d_loss)
return {
'g_loss':self.gen_loss_tracker.result(),
'd_loss':self.disc_loss_tracker.result()
}
cond_gan = ConditionalGan(
discriminator=discriminator, generator=generator, latent_dim=latent_dim
)
cond_gan.compile(
d_optimizer=keras.optimizers.Adam(learning_rate=0.0003),
g_optimizer=keras.optimizers.Adam(learning_rate=0.0003),
loss_fn=keras.losses.BinaryCrossentropy(from_logits=True),
)
cond_gan.fit(dataset, epochs=20)
And the ERROR is below. And i have search for some answering. Some of them are solving this kind of problems by editing the batch_size. I have tried, but failed. The error doesn't indicate the right line. This code is compiling in Kaggle Notebook. So maybe the location doesn't right.
InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [64,128] vs. shape[1] = [48,10]
[[node concat (defined at tmp/ipykernel_34/2191800439.py:95) ]] [Op:__inference_train_function_38770]
Errors may have originated from an input operation.
Input Source operations connected to node concat:
IteratorGetNext (defined at tmp/ipykernel_34/2191800439.py:156)
random_normal (defined at tmp/ipykernel_34/2191800439.py:90)
Function call stack:
train_function
THANK U FOR ANSWERING!!!!
The problem is here random_vector_labels = tf.concat((random_latent_vector,one_hot_labels),-1)
It throws "InvalidArgumentError" error because random_latent_vector shape [64,128] and your one_hot_labels shape[48,10] don't match. your random_latent_vector has 64 rows of data but your one_hot_labels only has 48 rows of data. So when you are trying to concatenate two arrays with different dimensions make sure that they have the same number of rows. So make sure your random_latent_vector shape matches with one_hot_labels shape.

Validation accuracy initially high then low

I am training a model for text sentiment classification with CNN. In it, the validation accuracy is initially more than training accuracy and then it decreases. Is this behavior acceptable? If not then what can be the reason and how to solve it?
My model:
class hyper():
def __init__(self,embedding_dim,filter_sizes,num_filters,dropout_prob,hidden_dims,batch_size,num_epochs):
# Model Hyperparameters
self.embedding_dim = embedding_dim
self.filter_sizes = filter_sizes
self.num_filters = num_filters
self.dropout_prob = dropout_prob
self.hidden_dims = hidden_dims
# Training parameters
self.batch_size = batch_size
self.num_epochs = num_epochs
class prep_hyper():
def __init__(self,sequenceLength,max_words):
# Prepossessing parameters
self.sequenceLength = sequenceLength
self.max_words = max_words
m_hyper=hyper(embedding_dim=embed_dim,filter_sizes=(3,4,5,6,8),num_filters=80,dropout_prob=(0.2,0.5),
hidden_dims=50,batch_size=128,num_epochs= 30)
pr_hyper = prep_hyper(sequenceLength=sequence_length,max_words=vocab_size)
model architecture:
def build_model(pr_hyper,m_hyper):
# Convolutional block
model_input = Input(shape=(pr_hyper.sequenceLength))
# use a random embedding for the text
x = Embedding(pr_hyper.max_words, m_hyper.embedding_dim,weights=[emb],trainable=False)(model_input)
# x = SpatialDropout1D(m_hyper.dropout_prob[0])(x)
conv_kern_reg = regularizers.l2(0.0001)
conv_bias_reg = regularizers.l2(0.0001)
conv_blocks = []
for sz in m_hyper.filter_sizes:
conv = Convolution1D(filters=m_hyper.num_filters,
kernel_size=sz,
# padding="same",
activation="relu",
strides=1,
kernel_regularizer=conv_kern_reg,
bias_regularizer=conv_bias_reg
)(x)
conv = GlobalMaxPooling1D()(conv)
conv_blocks.append(conv)
# merge
x = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0]
x = Dense(m_hyper.hidden_dims, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
x = Dense(100, activation="relu")(x)
x = Dropout(m_hyper.dropout_prob[1])(x)
model_output = Dense(3, activation="softmax")(x)
model = Model(model_input, model_output)
model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.00005), metrics=["accuracy"]) #categorical_crossentropy
print(model.summary())
tf.keras.utils.plot_model(model, show_shapes=True)#, to_file='multichannel.png')
return model
INITIAL EPOCHS:
There are several reasons that this happens, like, the dropout layers is disabled during validation. For more information I would suggest you to see this
that describes several possible reasons that this happens.

Unable to call model.build() . "ValueError: You cannot build your model by calling `build` if your layers do not support float type inputs."

I've been trying to implement the " An image is worth 16x16 words: Transformers for Image Recognition at Scale " paper by Alexey Dosovitskiy et al using TensorFlow. While implementing the model I'm running into the following error
ValueError: You cannot build your model by calling `build` if your layers do not support float type inputs. Instead, in order to instantiate and build your model, `call` your model on real tensor data (of the correct dtype).
My code is split into two files called model.py and train.py
The train.py file is as below
import tensorflow_addons as tfa
from tensorflow import keras
from model import VisionTransformer
num_classes = 10
inputshape = (32, 32, 3)
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
learning_rate = 0.001
weight_decay = 0.0001
batch_size = 256
num_epochs = 1
image_size = 72
patch_size = 6
num_patches = (image_size // patch_size) ** 2
projection_dim = 64
num_heads = 4
transformer_units = [
projection_dim * 2,
projection_dim,
]
transformer_layers = 8
mlp_head_units = [2048, 1024]
model = VisionTransformer(
inputshape,
patch_size,
num_patches,
projection_dim,
transformer_layers,
num_heads,
transformer_units,
mlp_head_units,
num_classes
)
optimizer = tfa.optimizers.AdamW(
learning_rate=learning_rate, weight_decay=weight_decay
)
model.compile(optimizer=optimizer,
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[
keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
keras.metrics.SparseTopKCategoricalAccuracy(5,
name="top-5"
"-accuracy"),
],
)
model.build(inputshape)
model.summary()
The model.py file is as below
import keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class Patches(layers.Layer):
def __init__(self, patch_size):
super(Patches, self).__init__()
self.patch_size = patch_size
def call(self, images, **kwargs):
batch_size = tf.shape(images)[0]
patches = tf.image.extract_patches(
images=images,
sizes=[1, self.patch_size, self.patch_size, 1],
strides=[1, self.patch_size, self.patch_size, 1],
rates=[1, 1, 1, 1],
padding="VALID",
)
patch_dims = patches.shape[-1]
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
return patches
class PatchEncoder(layers.Layer):
def __init__(self, num_patches, projection_dim):
super(PatchEncoder, self).__init__()
self.num_patches = num_patches
self.projection = layers.Dense(units=projection_dim)
self.position_embedding = layers.Embedding(
input_dim=num_patches, output_dim=projection_dim
)
def call(self, patch, **kwargs):
positions = tf.range(start=0, limit=self.num_patches, delta=1)
encoded = self.projection(patch) + self.position_embedding(positions)
return encoded
def mlp(x, hidden_units, dropout_rate):
for units in hidden_units:
x = layers.Dense(units, activation=tf.nn.gelu)(x)
x = layers.Dropout(dropout_rate)(x)
return x
class VisionTransformer(tf.keras.Model):
def __init__(self, inputshape, patch_size, num_patches, projection_dim,
transformer_layers, num_heads, transformer_units,
mlp_head_units, num_classes):
super(VisionTransformer, self).__init__()
self.inputshape = inputshape
self.patch_size = patch_size
self.num_patches = num_patches
self.projection_dim = projection_dim
self.transformer_layers = transformer_layers
self.num_heads = num_heads
self.transformer_units = transformer_units
self.mlp_head_units = mlp_head_units
self.num_classes = num_classes
def call(self, input, training):
inputs = layers.Input(shape=self.inputshape)
patches = Patches(self.patch_size)(inputs)
encoded_patches = PatchEncoder(self.num_patches, self.projection_dim)
(patches)
for _ in range(self.transformer_layers):
x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
attention_output = layers.MultiHeadAttention(
num_heads=self.num_heads, key_dim=self.projection_dim,
dropout=0.1
)(x1, x1)
x2 = layers.Add()([attention_output, encoded_patches])
x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
x3 = mlp(x3, hidden_units=self.transformer_units, dropout_rate=0.1)
encoded_patches = layers.Add()([x3, x2])
representation = layers.LayerNormalization(epsilon=1e-6)(
encoded_patches)
representation = layers.Flatten()(representation)
representation = layers.Dropout(0.5)(representation)
features = mlp(representation, hidden_units=self.mlp_head_units,
dropout_rate=0.5)
logits = layers.Dense(self.num_classes)(features)
model = keras.Model(inputs=inputs, outputs=logits)
return model
today I meet the problem too.I solved the problem by deleting the Input layer.
I think the reason is that the model will build Input_layer when build() is called, so it's no need to build a new Input_layer. Otherwise, get error!
def call(self, input, training):
# please annotation the Input layer and try again
# inputs = layers.Input(shape=self.inputshape)
patches = Patches(self.patch_size)(inputs)
encoded_patches = PatchEncoder(self.num_patches, self.projection_dim)
(patches)

Tensorflow Prune Layer Not Supported

I am trying to prune a model in tensorflow but coming across an error I don't know how to tackle. The error is ValueError: Please initialize "Prune" with a supported layer. Layers should either be a "PrunableLayer" instance, or should be supported by the PruneRegistry. You passed: <class 'base_transformer_tf.TransformerEncoder'>
The model is created using following
def transformer_encoder(num_columns, num_labels, num_layers, d_model, num_heads, dff, window_size, dropout_rate, weight_decay, label_smoothing, learning_rate):
inp = tf.keras.layers.Input(shape = (window_size, num_columns))
x = tf.keras.layers.BatchNormalization()(inp)
x = tf.keras.layers.Dense(d_model)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('swish')(x)
x = tf.keras.layers.SpatialDropout1D(dropout_rate)(x)
x = TransformerEncoder(num_layers, d_model, num_heads, dff, window_size, dropout_rate)(x)
out = tf.keras.layers.Dense(num_labels, activation = 'sigmoid', dtype=tf.float32)(x[:, -1, :])
model = tf.keras.models.Model(inputs = inp, outputs = out)
model.compile(optimizer = tfa.optimizers.AdamW(weight_decay = weight_decay, learning_rate = learning_rate),
loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = label_smoothing),
metrics = tf.keras.metrics.AUC(name = 'AUC'),
)
return model
The pruning portion of code is following
pruning_params = {
'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.00,
final_sparsity=0.50,
begin_step=0,
end_step=end_step)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
logdir = tempfile.mkdtemp()
callbacks = [
tfmot.sparsity.keras.UpdatePruningStep(),
tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(np.concatenate((X_tr2, X_val)), np.concatenate((y_tr2, y_val)),
batch_size=batch_size, epochs=epochs, validation_split=validation_split,
callbacks=callbacks)
Any help would be appreciated
Tensorflow does not know how to prune your custom TransformerEncoder Keras layer. You should specify which weights to sparsify, as in this example: Prune custom Keras layer or modify parts of layer to prune.
That would look like:
class TransformerEncoder(tf.keras.layers.Layer, tfmot.sparsity.keras.PrunableLayer):
def get_prunable_weights(self):
return [self.my_weight, ..]

Transformer model not able to be saved

I'm trying to follow this tutrial https://colab.research.google.com/github/tensorflow/examples/blob/master/community/en/transformer_chatbot.ipynb, However, when I tried to save the model in order to load it again without training I got an error mentioned here NotImplementedError: Layers with arguments in `__init__` must override `get_config`
I understood from the answer that I need to make the encoder and decoder as classes and customise it(instead of leaving it as functions like the colab tutrial) so I went back to tensor flow documentation of this model here: https://www.tensorflow.org/tutorials/text/transformer#encoder_layer and tried to edit in it. I made the encoder layer as:
class EncoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, rate=0.1,**kwargs,):
#super(EncoderLayer, self).__init__()
super().__init__(**kwargs)
self.mha = MultiHeadAttention(d_model, num_heads)
self.ffn = point_wise_feed_forward_network(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = tf.keras.layers.Dropout(rate)
self.dropout2 = tf.keras.layers.Dropout(rate)
def get_config(self):
config = super().get_config().copy()
config.update({
#'vocab_size': self.vocab_size,
#'num_layers': self.num_layers,
#'units': self.units,
'd_model': self.d_model,
'num_heads': self.num_heads,
'dropout': self.dropout,
})
return config
def call(self, x, training, mask):
attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model)
ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model)
return out2
and same for the decoder layer class. Then the same encoder in the documentation of tf
class Encoder(tf.keras.layers.Layer):
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
maximum_position_encoding, rate=0.1):
super(Encoder, self).__init__()
self.d_model = d_model
self.num_layers = num_layers
self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
self.pos_encoding = positional_encoding(maximum_position_encoding,
self.d_model)
self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)
for _ in range(num_layers)]
self.dropout = tf.keras.layers.Dropout(rate)
def call(self, x, training, mask):
seq_len = tf.shape(x)[1]
# adding embedding and position encoding.
x = self.embedding(x) # (batch_size, input_seq_len, d_model)
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
x += self.pos_encoding[:, :seq_len, :]
x = self.dropout(x, training=training)
for i in range(self.num_layers):
x = self.enc_layers[i](x, training, mask)
return x # (batch_size, input_seq_len, d_model)
the function of the model as:
def transformer(vocab_size,
num_layers,
units,
d_model,
num_heads,
dropout,
name="transformer"):
inputs = tf.keras.Input(shape=(None,), name="inputs")
dec_inputs = tf.keras.Input(shape=(None,), name="dec_inputs")
enc_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='enc_padding_mask')(inputs)
# mask the future tokens for decoder inputs at the 1st attention block
look_ahead_mask = tf.keras.layers.Lambda(
create_look_ahead_mask,
output_shape=(1, None, None),
name='look_ahead_mask')(dec_inputs)
# mask the encoder outputs for the 2nd attention block
dec_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='dec_padding_mask')(inputs)
enc_outputs = Encoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
input_vocab_size=vocab_size,
)(inputs=[inputs, enc_padding_mask])
dec_outputs = Decoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
target_vocab_size=vocab_size,
)(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask])
outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs)
return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)
and calling the model:
#the model itself with its paramters:
# Hyper-parameters
NUM_LAYERS = 3
D_MODEL = 256
#D_MODEL=tf.cast(D_MODEL, tf.float32)
NUM_HEADS = 8
UNITS = 512
DROPOUT = 0.1
model = transformer(
vocab_size=VOCAB_SIZE,
num_layers=NUM_LAYERS,
units=UNITS,
d_model=D_MODEL,
num_heads=NUM_HEADS,
dropout=DROPOUT)
However, I got that error:
TypeError: __init__() missing 2 required positional arguments: 'dff' and 'maximum_position_encoding'
I am really confused and I don't understand what dff and maximum position encoding mean in the documentation and when I removed them from the encoder and decoder classes, I got anther error as positional_encoding function takes maximum position as input and also dff is passed as input inside the class. I am not so sure what I should do as I am not sure whether I am following the right steps or not
If you get this error while calling transformer then your problem is with creating the model, not saving it.
Other than that, I see several issues with your get_config:
You defined dropout instead of rate.
The attributes you address (self.d_model etc.) are not defined or assigned at __init__.
It doesn't exist for your Encoder class.

Categories