I'm trying to load a Keras RNN model which has a custom RNN_Cell here's my model code:
class RLSCell(keras.layers.Layer):
def __init__(self, units, filters, kernel, **kwargs):
self.units = filters
self.state_size = None
self.conv3d_1 = layers.Conv3D(filters, kernel, activation='relu', padding = 'SAME')
self.conv3d_2 = layers.Conv3D(1, kernel, activation='linear', padding = 'SAME')
super(RLSCell, self).__init__(**kwargs)
def build(self, input_shapes):
self.state_size = tf.TensorShape([input_shapes[1], input_shapes[2], input_shapes[3], input_shapes[4]])
def call(self, inputs, states):
prev_output = states[0]
conc = tf.concat([inputs, prev_output] ,axis = 4)
conv = self.conv3d_1(conc)
output = self.conv3d_2(conv)
return output, [output]
cell = RLSCell(img_size, 32, 3)
layer = layers.RNN(cell)
y = layer(inp, initial_state=x)
for saving the model data I used keras predefined tf.keras.callbacks.ModelCheckpoint function.
here's my code for loading the model:
model = tf.keras.models.load_model('RLS results/checkpoint ep11-loss0.02',
custom_objects = {
'RLSCell': RLSCell,
'RNN': layers.RNN(RLSCell(IMG_SIZE, 32, 3))})
no matter whether I write RNN attribute or not I get the following error:
RuntimeError: Unable to restore object of class 'RNN' likely due to name conflict with built-in Keras class '<class 'keras.layers.recurrent.RNN'>'.
To override the built-in Keras definition of the object, decorate your class with `#keras.utils.register_keras_serializable` and include that file in your program,
or pass your class in a `keras.utils.CustomObjectScope` that wraps this load call
I'm not sure how to tell it this is the actual keras RNN code and just the cell is different. please Help.
thanks in advance.
Related
I have trained a TensorFlow model and saved it to a local disk. when I loaded it and do inference, how can I get the output of the intermediate layer?
I use the example in the tutorial as a demo.
The model is:
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.conv1 = Conv2D(32, 3, activation='relu')
self.flatten = Flatten()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10)
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
return self.d2(x)
# Create an instance of the model
model = MyModel()
I save the model to local and load it in another place.
# save model to local
tf.saved_model.save(model, export_dir="./saved_model")
# load model from local
loaded = tf.saved_model.load("./saved_model")
concrete_fun = loaded.signatures["serving_default"]
# do reference
out = concrete_fun(tf.zeros((2, 28, 28, 1)))
out["output_1"].shape
As I know, the concrete function is unique to the input and output.
How can I get the output, weights and bias of intermediate layers, for example self.conv1?
You can try running:
print([var for var in concrete_fun.trainable_variables])
to get your each layer's weights and biases. To access the graph of your model, you can run concrete_fun.graph. See here for more details.
To access the output of intermediate layers, it would be easiest to save the model like this:
model.save('your_model', save_format='tf')
and then load it:
model.save('your_model', save_format='tf')
model = tf.keras.models.load_model('your_model')
conv_layer = model.get_layer(index=0)
print(conv_layer(tf.random.normal((1, 28, 28, 1))).shape)
You can access any layer in your model with the .get_layer() method (Model class). You just need the name of the layer:
conv1_layer = model.get_layer("name_of_the_layer")
From this you can access the weights (Layer class ):
conv1_layer.get_weights()
the output:
conv1_layer.output
and the bias (https://stackoverflow.com/a/42053507/13469674):
conv1_layer.get_weights()[1]
I recently created a Tensorflow/Keras model with Keras Transformers. To do this, the custom PositionalEmbedding & TransformerEncoder classes were created and used to build the model architecture. There are created as such:
class PositionalEmbedding(layers.Layer):
def __init__(self, sequence_length, output_dim, **kwargs):
super().__init__(**kwargs)
self.position_embeddings = layers.Embedding(
input_dim=sequence_length, output_dim=output_dim
)
self.sequence_length = sequence_length
self.output_dim = output_dim
def call(self, inputs):
# The inputs are of shape: `(batch_size, frames, num_features)`
length = tf.shape(inputs)[1]
positions = tf.range(start=0, limit=length, delta=1)
embedded_positions = self.position_embeddings(positions)
return inputs + embedded_positions
def compute_mask(self, inputs, mask=None):
mask = tf.reduce_any(tf.cast(inputs, "bool"), axis=-1)
return mask
class TransformerEncoder(layers.Layer):
def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
super().__init__(**kwargs)
self.embed_dim = embed_dim
self.dense_dim = dense_dim
self.num_heads = num_heads
self.attention = layers.MultiHeadAttention(
num_heads=num_heads, key_dim=embed_dim, dropout=0.3
)
self.dense_proj = keras.Sequential(
[layers.Dense(dense_dim, activation=tf.nn.gelu), layers.Dense(embed_dim),]
)
self.layernorm_1 = layers.LayerNormalization()
self.layernorm_2 = layers.LayerNormalization()
def call(self, inputs, mask=None):
if mask is not None:
mask = mask[:, tf.newaxis, :]
attention_output = self.attention(inputs, inputs, attention_mask=mask)
proj_input = self.layernorm_1(inputs + attention_output)
proj_output = self.dense_proj(proj_input)
return self.layernorm_2(proj_input + proj_output)
At first, I was unable to even save this model using the typical model.save() method. However, I was able to solve for this by updating the config for the classes like so:
### FOR THE PositionalEmbedding CLASS
def get_config(self):
config = super().get_config().copy()
config.update({
'position_embeddings': self.position_embeddings,
'sequence_length': self.sequence_length,
'output_dim': self.output_dim
})
return config
### FOR THE TransformerEncoder CLASS
def get_config(self):
config = super().get_config().copy()
config.update({
'embed_dim': self.embed_dim,
'dense_dim': self.dense_dim,
'num_heads': self.num_heads,
'attention': self.attention,
'dense_proj': self.dense_proj,
'layernorm_1': self.layernorm_1,
'layernorm_2': self.layernorm_2
})
return config
However, when I try to load the model using the keras load_model() method without the custom_objects argument, I get the following error:
ValueError: Unknown layer: PositionalEmbedding. Please ensure this object is passed to the `custom_objects` argument.
And if I use the load _model() method without initializing the classes, using the custom_objects argument for the two classes as such load_model('my_model.h5', custom_objects= {'PositionalEmbedding':PositionalEmbedding,'TransformerEncoder':TransformerEncoder}), I get the following error:
NameError: name 'PositionalEmbedding' is not defined
And finally, if I do initialize the classes with the updated configs before loading, and use the load_model() method as shown in the previous example, I get the following error:
TypeError: ('Keyword argument not understood:', 'position_embeddings')
Anyone know what might be causing this issue and how I can resolve them to load this model? Any help is appreciated!
Thanks!
Sam
So I was actually able to solve this problem with a workaround. Instead of saving the model and loading it the old-fashioned way, I saved a checkpoint for the model while training, then loaded it by creating a new model from scratch and loading the checkpoint as the weights.
The code for that is below:
### SAVING THE MODEL WITH CHECKPOINT
filepath = "/content/drive/MyDrive/tmp/model_checkpoint.ckpt"
checkpoint = keras.callbacks.ModelCheckpoint(
filepath, save_weights_only=True, save_best_only=True, verbose=1
)
history = model.fit(
train_data,
train_labels,
validation_split=0.3,
epochs=250,
batch_size=256,
callbacks=[checkpoint],
)
### CREATING NEW MODEL & LOADING CHECKPOINT AS WEIGHTS
def get_compiled_model():
sequence_length = MAX_SEQ_LENGTH
embed_dim = NUM_FEATURES
dense_dim = 4
num_heads = 1
classes = len(label_processor.get_vocabulary())
inputs = keras.Input(shape=(None, None))
x = PositionalEmbedding(
sequence_length, embed_dim, name="frame_position_embedding"
)(inputs)
x = TransformerEncoder(embed_dim, dense_dim, num_heads, name="transformer_layer")(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(classes, activation="softmax")(x)
model = keras.Model(inputs, outputs)
model.compile(
optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
return model
model = get_compiled_model()
model.load_weights("/content/drive/MyDrive/tmp/model_checkpoint.ckpt")
I want to perform some transformation on the input batch during training. For example, if I have a batch of images of size (number of samples, width, height, channels), I want to replace the 3rd channel with the difference of the first two channels, then resize the image size and finally normalize it.
I tried to define a custom layer:
class CustomLayer(tf.keras.layers.Layer):
def __init__(self):
super(CustomLayer, self).__init__()
def build(self, input_shape):
pass
def call(self, input_):
#Loaded images
self.img_tr = []
for image in input_:
img_input = resize(image,(267,400))#from skimage import resize
img_diff = (img_input[:,:,1]/np.max(img_input[:,:,1]))-((img_input[:,:,0]+img_input[:,:,2])/np.max(img_input[:,:,0]+img_input[:,:,2]))
img_temp = np.zeros((267,400,3))
img_temp[:,:,0] = img_input[:,:,0]/np.max(img_input[:,:,0])
img_temp[:,:,1] = img_input[:,:,1]/np.max(img_input[:,:,1])
img_temp[:,:,2] = img_diff/np.max(img_diff)
self.img_tr.append(img_temp)
self.img_tr= np.asarray(self.img_tr)
return self.img_tr
Then I used:
input_0 = tf.keras.Input(shape = (None,None,3))
clayer = CustomLayer()
input_1 = clayer(input_0)
x = tf.keras.layers.Conv2D(filters = 16, kernel_size = (7,7), activation = tf.keras.activations.relu)(input_1)
x = tf.keras.layers.MaxPool2D(pool_size = (2,2))(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(units = 64, activation = tf.keras.activations.relu)(x)
output = tf.keras.layers.Dense(units = 12)(x)
model = tf.keras.Model(inputs = input_0, outputs = output)
model.compile(
optimizer = tf.keras.optimizers.Adam(),
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
metrics = tf.keras.metrics.SparseCategoricalAccuracy()
)
model.summary()
I get an error that says:
AttributeError: 'Tensor' object has no attribute 'ndim'
I think the issue related to the fact that my custom layer expects a 4d numpy array but the input is having this format:
<KerasTensor: shape=(None, None, None, 3) dtype=float32 (created by layer 'input_20')>
How can I resolve the issue? I cannot find a way to convert KerasTensor to a numpy array inside my custom layer.
Edit
I tried to avoid for loops and numpy so I tried:
class CustomLayer(tf.keras.layers.Layer):
def __init__(self):
super(CustomLayer, self).__init__()
def build(self, input_shape):
pass
def call(self, input_):
input_ = tf.Variable(input_)
img_input = tf.image.resize(input_,(267,400))
img_diff = (img_input[:,:,:,1])-((img_input[:,:,:,0]+img_input[:,:,:,2]))
img_input[:,:,:,2] = img_diff
output_img = tf.image.per_image_standardization(img_input)
return input_
However, when I use the custom layer in the functional API I get the error:
ValueError: Tensor-typed variable initializers must either be wrapped in an init_scope or callable (e.g., `tf.Variable(lambda : tf.truncated_normal([10, 40]))`) when building functions. Please file a feature request if this restriction inconveniences you.
It seems has something to do with tf.Variable. Even if I set validate_shape to False I still get the same error.
Simply removing tf.Variable does the job. Below is the full layer:
class CustomLayer(tf.keras.layers.Layer):
def __init__(self):
super(CustomLayer, self).__init__()
def build(self, input_shape):
pass
def call(self, inp):
img_input = tf.image.resize(inp, (267,400))
img_diff = (img_input[:,:,:,1])-((img_input[:,:,:,0]+img_input[:,:,:,2]))
img_diff = tf.expand_dims(img_diff, -1)
img_input = tf.keras.layers.Concatenate()([img_input[:,:,:,:-1], img_diff])
output_img = tf.image.per_image_standardization(img_input)
return output_img
I used tf.keras.layers.Concatenate to replace the last channel of img_input with img_diff.
Here the running notebook
I fine tuned the pretrained model here by freezing all layers except the classifier layers. And I saved weight file with using pytorch as .bin format.
Now instead of loading the 400mb pre-trained model, is there a way to load the parameters of the just Classifier layer I retrained it? By the way, I know that I have to load the original pretrained model, I just don't want to load the entire fine tuned model. due to memory concerns.
I can access the last layer's parameters from state_dict as below, but how can I save them in a separate file to use them later for less memory usage?
model = PosTaggingModel(num_pos_tag=num_pos_tag)
state_dict = torch.load("model.bin")
print("state dictionary:",state_dict)
with torch.no_grad():
model.out_pos_tag.weight.copy_(state_dict['out_pos_tag.weight'])
model.out_pos_tag.bias.copy_(state_dict['out_pos_tag.bias'])
Here is the model class:
class PosTaggingModel(nn.Module):
def __init__(self, num_pos_tag):
super(PosTaggingModel, self).__init__()
self.num_pos_tag = num_pos_tag
self.model = AutoModel.from_pretrained("dbmdz/bert-base-turkish-cased")
for name, param in self.model.named_parameters():
if 'classifier' not in name: # classifier layer
param.requires_grad = False
self.bert_drop = nn.Dropout(0.3)
self.out_pos_tag = nn.Linear(768, self.num_pos_tag)
def forward(self, ids, mask, token_type_ids, target_pos_tag):
o1, _ = self.model(ids, attention_mask = mask, token_type_ids = token_type_ids)
bo_pos_tag = self.bert_drop(o1)
pos_tag = self.out_pos_tag(bo_pos_tag)
loss = loss_fn(pos_tag, target_pos_tag, mask, self.num_pos_tag)
return pos_tag, loss
I don't know if this is possible but I'm just looking for a way to save and reuse the last layer's parameters, without the need for parameters of frozen layers. I couldn't find it in the documentation.
Thanks in advance to those who will help.
You can do it like this
import torch
# creating a dummy model
class Classifier(torch.nn.Module):
def __init__(self):
super(Classifier, self).__init__()
self.first = torch.nn.Linear(10, 10)
self.second = torch.nn.Linear(10, 20)
self.last = torch.nn.Linear(20, 1)
def forward(self, x):
pass
# Creating its object
model = Classifier()
# Extracting the layer to save
to_save = model.last
# Saving the state dict of that layer
torch.save(to_save.state_dict(), './classifier.bin')
# Recreating the object of that model
model = Classifier()
# Updating the saved layer of model
model.last.load_state_dict(torch.load('./classifier.bin'))
I use Tensorflow 2.0 and want to extract all weights and biases from a trained model. Here is what I did so far:
I create a model class:
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__() # MyModel comes from a Basis Class
self.conv1 = Conv2D(filters=32, kernel_size=3, strides=[2,2], activation='relu')
self.flatten = Flatten()
self.d1 = Dense(units=64, activation="relu")
self.d2 = Dense(units=10, activation="softmax")
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
x = self.d2(x)
return x
During and after the training, I save my model:
checkpoint_path = "./logs/model.ckpt"
checkpoint_dir = "./logs/"
self.model.save_weights(checkpoint_path)
self.model.save(checkpoint_dir)
At this point I ask myself already, how to save the model correctly? Do I use save_weights or just save? I want to be able to
retrain the model if necessary
extract the model's weights for further analysis
Currently I load my trained model (in a new file) by doing:
model = MyModel()
model.load_weights(checkpoint_path)
But how can I access the network's weights? I already tried tf.compat.v1.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) which did not work.
I highly appreciate any help!
Firstly the difference between two saving methods:
model.save_weights(): You save only weights. So, you need the model code to reconstruct the model as model = MyModel() with initial weights. Then you replace weights by .load_weights()
model.save(): It saves the whole model including the architecture, optimizer states and weights. So, you can reproduce the entire mode without the code that defines MyModel().
By the way, another option in TF2 is to use checkpoint manager.
In your case, I would go with .save_weights() or checkpoint manager
Next, you can analyze the weights by:
import tf.keras.Model as Model
from tf.keras.layers import Conv2D, Flatten, Dense
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel, self).__init__() # MyModel comes from a Basis Class
self.conv1 = Conv2D(filters=32, kernel_size=3, strides=[2,2], activation='relu')
self.flatten = Flatten()
self.d1 = Dense(units=64, activation="relu")
self.d2 = Dense(units=10, activation="softmax")
def call(self, x):
x = self.conv1(x)
x = self.flatten(x)
x = self.d1(x)
x = self.d2(x)
return x
m = MyModel()
input_shape = tf.TensorShape([None,64,64,1]) # For exmaple, 64x64 images with arbitrary batch size
m.build(input_shape)
# Train
# Save weights
# Load weights
# Analyze weights
conv1_weights, conv1_bias = m.conv1.weights
d1_weights, d1_bias = m.d1.weights
d2_weights, d2_bias = m.d2.weights