I use Tensorflow 2.0 and want to extract all weights and biases from a trained model. Here is what I did so far:
I create a model class:
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__() # MyModel comes from a Basis Class
self.conv1 = Conv2D(filters=32, kernel_size=3, strides=[2,2], activation='relu')
self.flatten = Flatten()
self.d1 = Dense(units=64, activation="relu")
self.d2 = Dense(units=10, activation="softmax")
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
x = self.d2(x)
return x
During and after the training, I save my model:
checkpoint_path = "./logs/model.ckpt"
checkpoint_dir = "./logs/"
self.model.save_weights(checkpoint_path)
self.model.save(checkpoint_dir)
At this point I ask myself already, how to save the model correctly? Do I use save_weights or just save? I want to be able to
retrain the model if necessary
extract the model's weights for further analysis
Currently I load my trained model (in a new file) by doing:
model = MyModel()
model.load_weights(checkpoint_path)
But how can I access the network's weights? I already tried tf.compat.v1.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) which did not work.
I highly appreciate any help!
Firstly the difference between two saving methods:
model.save_weights(): You save only weights. So, you need the model code to reconstruct the model as model = MyModel() with initial weights. Then you replace weights by .load_weights()
model.save(): It saves the whole model including the architecture, optimizer states and weights. So, you can reproduce the entire mode without the code that defines MyModel().
By the way, another option in TF2 is to use checkpoint manager.
In your case, I would go with .save_weights() or checkpoint manager
Next, you can analyze the weights by:
import tf.keras.Model as Model
from tf.keras.layers import Conv2D, Flatten, Dense
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__() # MyModel comes from a Basis Class
self.conv1 = Conv2D(filters=32, kernel_size=3, strides=[2,2], activation='relu')
self.flatten = Flatten()
self.d1 = Dense(units=64, activation="relu")
self.d2 = Dense(units=10, activation="softmax")
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
x = self.d2(x)
return x
m = MyModel()
input_shape = tf.TensorShape([None,64,64,1]) # For exmaple, 64x64 images with arbitrary batch size
m.build(input_shape)
# Train
# Save weights
# Load weights
# Analyze weights
conv1_weights, conv1_bias = m.conv1.weights
d1_weights, d1_bias = m.d1.weights
d2_weights, d2_bias = m.d2.weights
Related
I have trained a TensorFlow model and saved it to a local disk. when I loaded it and do inference, how can I get the output of the intermediate layer?
I use the example in the tutorial as a demo.
The model is:
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = Conv2D(32, 3, activation='relu')
self.flatten = Flatten()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
# Create an instance of the model
model = MyModel()
I save the model to local and load it in another place.
# save model to local
tf.saved_model.save(model, export_dir="./saved_model")
# load model from local
loaded = tf.saved_model.load("./saved_model")
concrete_fun = loaded.signatures["serving_default"]
# do reference
out = concrete_fun(tf.zeros((2, 28, 28, 1)))
out["output_1"].shape
As I know, the concrete function is unique to the input and output.
How can I get the output, weights and bias of intermediate layers, for example self.conv1?
You can try running:
print([var for var in concrete_fun.trainable_variables])
to get your each layer's weights and biases. To access the graph of your model, you can run concrete_fun.graph. See here for more details.
To access the output of intermediate layers, it would be easiest to save the model like this:
model.save('your_model', save_format='tf')
and then load it:
model.save('your_model', save_format='tf')
model = tf.keras.models.load_model('your_model')
conv_layer = model.get_layer(index=0)
print(conv_layer(tf.random.normal((1, 28, 28, 1))).shape)
You can access any layer in your model with the .get_layer() method (Model class). You just need the name of the layer:
conv1_layer = model.get_layer("name_of_the_layer")
From this you can access the weights (Layer class ):
conv1_layer.get_weights()
the output:
conv1_layer.output
and the bias (https://stackoverflow.com/a/42053507/13469674):
conv1_layer.get_weights()[1]
I recently created a Tensorflow/Keras model with Keras Transformers. To do this, the custom PositionalEmbedding & TransformerEncoder classes were created and used to build the model architecture. There are created as such:
class PositionalEmbedding(layers.Layer):
def __init__(self, sequence_length, output_dim, **kwargs):
super().__init__(**kwargs)
self.position_embeddings = layers.Embedding(
input_dim=sequence_length, output_dim=output_dim
)
self.sequence_length = sequence_length
self.output_dim = output_dim
def call(self, inputs):
# The inputs are of shape: `(batch_size, frames, num_features)`
length = tf.shape(inputs)[1]
positions = tf.range(start=0, limit=length, delta=1)
embedded_positions = self.position_embeddings(positions)
return inputs + embedded_positions
def compute_mask(self, inputs, mask=None):
mask = tf.reduce_any(tf.cast(inputs, "bool"), axis=-1)
return mask
class TransformerEncoder(layers.Layer):
def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
super().__init__(**kwargs)
self.embed_dim = embed_dim
self.dense_dim = dense_dim
self.num_heads = num_heads
self.attention = layers.MultiHeadAttention(
num_heads=num_heads, key_dim=embed_dim, dropout=0.3
)
self.dense_proj = keras.Sequential(
[layers.Dense(dense_dim, activation=tf.nn.gelu), layers.Dense(embed_dim),]
)
self.layernorm_1 = layers.LayerNormalization()
self.layernorm_2 = layers.LayerNormalization()
def call(self, inputs, mask=None):
if mask is not None:
mask = mask[:, tf.newaxis, :]
attention_output = self.attention(inputs, inputs, attention_mask=mask)
proj_input = self.layernorm_1(inputs + attention_output)
proj_output = self.dense_proj(proj_input)
return self.layernorm_2(proj_input + proj_output)
At first, I was unable to even save this model using the typical model.save() method. However, I was able to solve for this by updating the config for the classes like so:
### FOR THE PositionalEmbedding CLASS
def get_config(self):
config = super().get_config().copy()
config.update({
'position_embeddings': self.position_embeddings,
'sequence_length': self.sequence_length,
'output_dim': self.output_dim
})
return config
### FOR THE TransformerEncoder CLASS
def get_config(self):
config = super().get_config().copy()
config.update({
'embed_dim': self.embed_dim,
'dense_dim': self.dense_dim,
'num_heads': self.num_heads,
'attention': self.attention,
'dense_proj': self.dense_proj,
'layernorm_1': self.layernorm_1,
'layernorm_2': self.layernorm_2
})
return config
However, when I try to load the model using the keras load_model() method without the custom_objects argument, I get the following error:
ValueError: Unknown layer: PositionalEmbedding. Please ensure this object is passed to the `custom_objects` argument.
And if I use the load _model() method without initializing the classes, using the custom_objects argument for the two classes as such load_model('my_model.h5', custom_objects= {'PositionalEmbedding':PositionalEmbedding,'TransformerEncoder':TransformerEncoder}), I get the following error:
NameError: name 'PositionalEmbedding' is not defined
And finally, if I do initialize the classes with the updated configs before loading, and use the load_model() method as shown in the previous example, I get the following error:
TypeError: ('Keyword argument not understood:', 'position_embeddings')
Anyone know what might be causing this issue and how I can resolve them to load this model? Any help is appreciated!
Thanks!
Sam
So I was actually able to solve this problem with a workaround. Instead of saving the model and loading it the old-fashioned way, I saved a checkpoint for the model while training, then loaded it by creating a new model from scratch and loading the checkpoint as the weights.
The code for that is below:
### SAVING THE MODEL WITH CHECKPOINT
filepath = "/content/drive/MyDrive/tmp/model_checkpoint.ckpt"
checkpoint = keras.callbacks.ModelCheckpoint(
filepath, save_weights_only=True, save_best_only=True, verbose=1
)
history = model.fit(
train_data,
train_labels,
validation_split=0.3,
epochs=250,
batch_size=256,
callbacks=[checkpoint],
)
### CREATING NEW MODEL & LOADING CHECKPOINT AS WEIGHTS
def get_compiled_model():
sequence_length = MAX_SEQ_LENGTH
embed_dim = NUM_FEATURES
dense_dim = 4
num_heads = 1
classes = len(label_processor.get_vocabulary())
inputs = keras.Input(shape=(None, None))
x = PositionalEmbedding(
sequence_length, embed_dim, name="frame_position_embedding"
)(inputs)
x = TransformerEncoder(embed_dim, dense_dim, num_heads, name="transformer_layer")(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(classes, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.compile(
optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
return model
model = get_compiled_model()
model.load_weights("/content/drive/MyDrive/tmp/model_checkpoint.ckpt")
I have code sequence as below. There i tried to wrap that code by subclassing using tensorflow model class. However i get following errors. Any help is apprated to solve these errors. Thank you in advance
code sequence
input_tensor = Input(shape=(720, 540, 2))
base_model = ResNet50V2(input_tensor=input_tensor, include_top=False, weights=None, classes=4)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(4, activation= 'sigmoid')(x)
model = Model(inputs = base_model.input, outputs = predictions)
Attemted model class
class StreoModel(tf.keras.Model):
def __init__(self):
super(StreoModel, self).__init__()
self.dense1 = Dense(4, activation='sigmoid')
def call(self, inputs):
input_tensor = Input(shape=(720, 540, 2))
x = ResNet50V2(input_tensor=input_tensor, include_top=False, weights=None, classes=4)
x= x.output
x = GlobalAveragePooling2D()(x)
predictions = self.dense1(x)
return predictions
Error log:
TypeError: Cannot convert a symbolic Keras input/output to a numpy array. This error may indicate that you're trying to pass a symbolic value to a NumPy call, which is not supported. Or, you may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model.
I think the problem lies in the way you pass data to your ResNet50V2. Try defining a simple Subclassing model like this:
class StreoModel(tf.keras.Model):
def __init__(self):
super(StreoModel, self).__init__()
self.resnet_v2 = tf.keras.applications.resnet_v2.ResNet50V2(include_top=False, weights=None, classes=4, input_shape=(720, 540, 2))
self.resnet_v2.trainable = True
x= self.resnet_v2.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
output = tf.keras.layers.Dense(4, activation='softmax')(x)
self.model = tf.keras.Model(self.resnet_v2.input, output)
Note that I removed your input layer and add an input shape to ResNet50V2. According to the docs, you should specify the input_shape if include_top=False. I also changed your output activation function to softmax, since you are dealing with 4 classes.
And then using it:
sm = StreoModel()
sm.model(np.random.random((1, 720, 540, 2)))
# <tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0.25427648, 0.25267935, 0.23970276, 0.2533414 ]], dtype=float32)>
If you want to define your Model with a call method, then you can do it like this:
class StreoModel(tf.keras.Model):
def __init__(self):
super(StreoModel, self).__init__()
self.dense = tf.keras.layers.Dense(4, activation='softmax')
self.resnet = tf.keras.applications.resnet_v2.ResNet50V2(include_top=False, weights=None, classes=4, input_shape=(720, 540, 2))
self.pooling = tf.keras.layers.GlobalAveragePooling2D()
def call(self, inputs):
x = self.resnet(inputs)
x = self.pooling(x)
predictions = self.dense(x)
return predictions
And use it like this:
sm = StreoModel()
sm(np.random.random((1, 720, 540, 2)))
# <tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0.25062975, 0.2428435 , 0.25178066, 0.25474608]], dtype=float32)>
I am trying to prune a model in tensorflow but coming across an error I don't know how to tackle. The error is ValueError: Please initialize "Prune" with a supported layer. Layers should either be a "PrunableLayer" instance, or should be supported by the PruneRegistry. You passed: <class 'base_transformer_tf.TransformerEncoder'>
The model is created using following
def transformer_encoder(num_columns, num_labels, num_layers, d_model, num_heads, dff, window_size, dropout_rate, weight_decay, label_smoothing, learning_rate):
inp = tf.keras.layers.Input(shape = (window_size, num_columns))
x = tf.keras.layers.BatchNormalization()(inp)
x = tf.keras.layers.Dense(d_model)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('swish')(x)
x = tf.keras.layers.SpatialDropout1D(dropout_rate)(x)
x = TransformerEncoder(num_layers, d_model, num_heads, dff, window_size, dropout_rate)(x)
out = tf.keras.layers.Dense(num_labels, activation = 'sigmoid', dtype=tf.float32)(x[:, -1, :])
model = tf.keras.models.Model(inputs = inp, outputs = out)
model.compile(optimizer = tfa.optimizers.AdamW(weight_decay = weight_decay, learning_rate = learning_rate),
loss = tf.keras.losses.BinaryCrossentropy(label_smoothing = label_smoothing),
metrics = tf.keras.metrics.AUC(name = 'AUC'),
)
return model
The pruning portion of code is following
pruning_params = {
'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.00,
final_sparsity=0.50,
begin_step=0,
end_step=end_step)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
logdir = tempfile.mkdtemp()
callbacks = [
tfmot.sparsity.keras.UpdatePruningStep(),
tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
model_for_pruning.fit(np.concatenate((X_tr2, X_val)), np.concatenate((y_tr2, y_val)),
batch_size=batch_size, epochs=epochs, validation_split=validation_split,
callbacks=callbacks)
Any help would be appreciated
Tensorflow does not know how to prune your custom TransformerEncoder Keras layer. You should specify which weights to sparsify, as in this example: Prune custom Keras layer or modify parts of layer to prune.
That would look like:
class TransformerEncoder(tf.keras.layers.Layer, tfmot.sparsity.keras.PrunableLayer):
def get_prunable_weights(self):
return [self.my_weight, ..]
Following a couple of tutorials and Stackoverflow questions, I managed to get my Double Dueling DQN Agent learning in my custom Environment.
Now I am unable to save and load my Agent's network model with its advantage() argument. How do I save & load such custom serialized networks?
Python 3.8.5
Tensorflow 2.3.1
Keras 2.4.3
My approach so far was simply:
class DuelingDeepQNAgent():
....
def load_model(self, model_name):
self.q_network = load_model(model_name)
self.target_network = load_model(model_name)
def save_model(self, model_name):
self.q_network.save(model_name, save_format='tf')
Loading the saved model and continuing on learning results in that error:
agent = MyAgent.DuelingDeepQNAgent()
agent.load_model(model_name)
....
# within learning process
q_values = self.q_network.advantage(state)
AttributeError: 'DuelingDeepQNetwork' object has no attribute 'advantage'
Network:
class DuelingDeepQNetwork(keras.Model):
def __init__(self, input_shape, output_shape, n_filter=64, kernel_size=3, n_neurons1=256, n_neurons2=128):
super(DuelingDeepQNetwork, self).__init__()
self.input_layer = keras.layers.InputLayer(input_shape=(None, *input_shape))
# self.lstm1 = keras.layers.LSTM(units=n_neurons1, activation='relu', stateful=True, return_state=True)
self.conv1d1 = keras.layers.Conv1D(filters=n_filter, kernel_size=kernel_size, padding='same', activation='relu')
self.pool1 = keras.layers.AvgPool1D(pool_size=2)
self.conv1d2 = keras.layers.Conv1D(filters=n_filter, kernel_size=kernel_size, padding='same', activation='relu')
self.pool2 = keras.layers.AvgPool1D(pool_size=2)
self.flat1 = keras.layers.Flatten()
self.dens_1 = keras.layers.Dense(n_neurons1, activation='relu')
self.dens_2 = keras.layers.Dense(n_neurons2, activation='relu')
self.V = keras.layers.Dense(1, activation=None) # Value layer
self.A = keras.layers.Dense(output_shape, activation=None) # Advantage layer
def call(self, state):
x = self.input_layer(state)
x = self.conv1d1(x)
x = self.pool1(x)
x = self.conv1d2(x)
x = self.pool2(x)
x = self.flat1(x)
x = self.dens_1(x)
x = self.dens_2(x)
A = self.A(x)
V = self.V(x)
Q = V + (A - tf.math.reduce_mean(A, axis=1, keepdims=True))
return Q
def advantage(self, state):
x = self.input_layer(state)
x = self.conv1d1(x)
x = self.pool1(x)
x = self.conv1d2(x)
x = self.pool2(x)
x = self.flat1(x)
x = self.dens_1(x)
x = self.dens_2(x)
A = self.A(x)
return A
EDIT:
My previous answer was making an hypothesis on the missing part of the code but I believe the below is actually the right answer.
I actually worked a bit more on a similar issue, the problem you probably have is that when you load the saved H5 model, it doesn't load the associated functions of the custom class.
What you need to do is to save/load only the weights to ensure you keep the class methods.
def save_model(self):
self.q_network.save_weights("model_name") #no *.h5 extension
def load_model(self):
self.q_network.load_weights("model_name")