How to set layer weights during training tensorflow - python

In every forward pass of the model, I want to implement l2 normalization on the softmax layer's columns, then set the weights back as per the imprinted weights paper and this pytorch implementation. I am using layer.set_weights() to set the normalized weights during the call() function of the model, but this implementation only works with eager execution, as something goes wrong with layer.set_weights() when building the graph.
here is the implementation of the model in tf 1.15:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense
class Extractor(Model):
def __init__(self, input_shape):
super(Extractor, self).__init__()
self.basenet = ResNet50(include_top=False, weights="imagenet",
pooling="avg", input_shape=input_shape)
def call(self, x):
x = self.basenet(x)
return(x)
class Embedding(Model):
def __init__(self, num_nodes, norm=True):
super(Embedding, self).__init__()
self.fc = Dense(num_nodes, activation="relu")
self.norm = norm
def call(self, x):
x = self.fc(x)
if self.norm:
x = tf.nn.l2_normalize(x)
return x
class Classifier(Model):
def __init__(self, n_classes, norm=True, bias=False):
super(Classifier, self).__init__()
self.n_classes = n_classes
self.norm = norm
self.bias = bias
def build(self, inputs_shape):
self.prediction = Dense(self.n_classes,
activation="softmax",use_bias=False)
def call(self, x):
if self.norm:
w = self.prediction.trainable_weights
if w:
w = tf.nn.l2_normalize(w, axis=2)
self.prediction.set_weights(w)
x = self.prediction(x)
return x
class Net(Model):
def __init__(self, input_shape, n_classes, num_nodes, norm=True,
bias=False):
super(Net, self).__init__()
self.n_classes = n_classes
self.num_nodes = num_nodes
self.norm = norm
self.bias = bias
self.extractor = Extractor(input_shape)
self.embedding = Embedding(self.num_nodes, norm=self.norm)
self.classifier = Classifier(self.n_classes, norm=self.norm,
bias=self.bias)
def call(self, x):
x = self.extractor(x)
x = self.embedding(x)
x = self.classifier(x)
return x
The weight normalization can be found in the call step of the Classifier class, where I call .set_weights() after normalizing it.
Creating the model with model = Net(input_shape,n_classes, num_nodes) and using model(x) works, but model.predict() and model.fit() give me errors about .get_weights(). I can train the model in eager mode with gradient tape, but it is extremely slow.
Is there another way I can set the weights of a Dense layer during training in each forward call but lets me use the model outside of eager mode? When I say eager mode I mean with tf.enable_eager_execution() at the start of the program.
Here is the error I get when I use model.predict(x) instead:
TypeError: len is not well defined for symbolic Tensors. (imprint_net_1/classifier/l2_normalize:0) Please call `x.shape` rather than `len(x)` for shape information.

Related

PyTorch Lightning Transfer Learning with Stacked LSTM

I have trained a stacked LSTM on PyTorch Lightning with the following layers:
def __init__(self, n_features, hidden_size, batch_size, num_layers, dropout, learning_rate):
super(LSTMClassifier, self).__init__()
...
# Architecture Baseline
self.lstm = nn.LSTM(input_size=n_features,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout,
batch_first=True)
self.relu = nn.ReLU()
self.fc = nn.Linear(hidden_size, 2)
self.sigmoid = nn.Sigmoid()
and architecture:
def forward(self, x):
# reshape to pass each element of sequence through lstm, and not all together
# LSTM needs a 3D tensor
x = x.view(len(x), 1, -1)
out, _ = self.lstm(x)
out = self.relu(out)
out = self.fc(out)
out = self.sigmoid(out)
# reshape back to be compatible with the true values' shape
out = out.reshape(self.batch_size, -1)
return out
Now, I want to use this pre-trained model for transfer learning according to the PyTorch Lightning tutorial:
import torchvision.models as models
class ImagenetTransferLearning(LightningModule):
def __init__(self):
super().__init__()
# init a pretrained resnet
backbone = # loading the pretrained model from file
num_filters = backbone.fc.in_features
layers = list(backbone.children())[:-3]
self.feature_extractor = nn.Sequential(*layers)
# use the pretrained model for binary classification
num_target_classes = 2
self.classifier = nn.Linear(num_filters, num_target_classes)
def forward(self, x):
self.feature_extractor.eval()
with torch.no_grad():
representations = self.feature_extractor(x).flatten(1)
x = self.classifier(representations)
...
However, LSTM layers cannot be entered into an nn.Sequential() as per this question. So, what sort of module can I use as feature_extractor instead of nn.Sequential()?
Ultimately, based on the linked question, I changed the feature_extractor definition as below (assuming you know the layers you want to freeze):
self.feature_extractor = nn.Sequential(self.layers[0], GetLSTMOutput(), self.layers[1])
Hopefully, this helps someone as I've been baffled for a while.

TF2.6: ValueError: Model cannot be saved because the input shapes have not been set

I want to create a custom model using transfer learning in Google Colab.
import tensorflow as tf
from tensorflow.keras.layers import Conv2D
from tensorflow.python.keras.applications.xception import Xception
class MyModel(tf.keras.Model):
def __init__(self, input_shape, num_classes=5, dropout_rate=0.5):
super(MyModel, self).__init__()
self.weight_dict = {}
self.weight_dict['backbone'] = Xception(input_shape=input_shape, weights='imagenet', include_top=False)
self.weight_dict['outputs'] = Conv2D(num_classes, (1, 1), padding="same", activation="softmax")
self.build((None,) + input_shape)
def call(self, inputs, training=False):
self.weight_dict['backbone'].trainable = False
x = self.weight_dict['backbone'](inputs)
x = self.weight_dict['outputs'](x)
return x
model = MyModel(input_shape=(256, 256, 3))
model.save('./saved')
However, I encounter this error:
ValueError: Model `<__main__.MyModel object at 0x7fc66134bdd0>` cannot be saved because the input shapes have not been set. Usually, input shapes are automatically determined from calling `.fit()` or `.predict()`. To manually set the shapes, call `model.build(input_shape)`.
Yes, there is no call to .fit() or .predict(). But there is a call to .build in the __init__() method of the class. What am I to do?
If the layer has not been built, compute_output_shape will call build on the layer. This assumes that the layer will later be used with inputs that match the input shape provided.
Working code as shown below
import tensorflow as tf
print(tf.__version__)
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.applications.xception import Xception
class MyModel(tf.keras.Model):
def __init__(self, input_shape, num_classes=5, dropout_rate=0.5):
super(MyModel, self).__init__()
self.weight_dict = {}
self.weight_dict['backbone'] = Xception(input_shape=input_shape, weights='imagenet', include_top=False)
self.weight_dict['outputs'] = Conv2D(num_classes, (1, 1), padding="same", activation="softmax")
self.build((None,) + input_shape)
def call(self, inputs, training=False):
self.weight_dict['backbone'].trainable = False
x = self.weight_dict['backbone'](inputs)
x = self.weight_dict['outputs'](x)
return x
input_shape=(256, 256, 3)
model=MyModel(input_shape)
model.compute_output_shape(input_shape=(None, 256, 256, 3))
model.save('./saved')
Output:
2.6.0
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
83689472/83683744 [==============================] - 1s 0us/step
INFO:tensorflow:Assets written to: ./saved/assets
For more information you can refer here.
If you build e.g. a GNN with multiple inputs of variable size, the proposal of TFer2 won't work. Specifying the TensorSpecs in the decorator AND using tf.saved_model.save instead works:
import keras.layers
import tensorflow as tf
import unittest
class TestModel(keras.Model):
def __init__(self):
super(TestModel, self).__init__()
self.w = tf.Variable(initial_value=tf.initializers.he_normal()(shape=[10, 10]))
#tf.function(input_signature=[tf.TensorSpec([10, None], tf.float32), tf.TensorSpec([10, None], tf.float32)]) # This line defines the inputs' sizes of the network call
def __call__(self, x, y):
return tf.matmul(self.w, x) + tf.matmul(self.w, y)
class SaveAndLoadTest(unittest.TestCase):
def __init__(self):
super(SaveAndLoadTest, self).__init__()
x = tf.ones([10, 5])
y = tf.ones([10, 5])
model = TestModel()
z = model(x, y)
tf.saved_model.save(model, "/tmp/test_model/") # saving a this way works
with self.assertRaises(ValueError):
model.save("/tmp/test_model/") # saving a model this way fails, regardless of assigning TensorSpecs to tf.function
model_loaded = tf.saved_model.load("/tmp/test_model/")
z_loaded = model_loaded(x, y)
self.assertTrue((z_loaded.numpy() == z.numpy()).all()) # making sure the outputs are the same
if __name__ == "__main__":
SaveAndLoadTest()
print("Success.")

Tensorflow 2.0: flat_map() to flatten Dataset of Dataset returns cardinality -2

I am trying to run the following code (as given in Tensorflow documentation) to create windows of my data and then flatten the dataset of datasets.
window_size = 5
windows = range_ds.window(window_size, shift=1)
for sub_ds in windows.take(5):
print(sub_ds)
flat_windows = windows.flat_map(lambda x: x)
The problem is that flat_windows.cardinality().numpy() returns cardinality to be -2 which is creating problem for me during training. I tried looking for ways to set_cardinality of a dataset but couldn't find anything. I also tried other ways of flattening a dataset of datasets, but again no success.
Edit-1: The problem with the training is that the shape is unknown (at Linear and Dense layers) when I am training a subclass model (given below). The model trains well when I train the model eagerly (through tf.config.run_functions_eagerly(True)) but that is slow. Therefore I want the input data to be known for the model training.
Neural Network
class NeuralNetworkModel(tf.keras.Model):
def __init__(self):
super(NeuralNetworkModel, self).__init__()
self.encoder = Encoder()
def train_step(self, inputs):
X = inputs[0]
Y = inputs[1]
with tf.GradientTape() as tape:
enc_X = self.encoder(X)
enc_Y = self.encoder(Y)
# loss:
loss = tf.norm(enc_Y - enc_X, axis = [0, 1], ord = 'fro')
# Compute gradients
trainable_vars = self.encoder.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [loss_tracker]
def test_step(self, inputs):
X = inputs[0]
Y = inputs[1]
Psi_X = self.encoder(X)
Psi_Y = self.encoder(Y)
# loss:
loss = tf.norm(Psi_Y - Psi_X, axis = [0, 1], ord = 'fro')
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
class Encoder(tf.keras.Model):
def __init__(self):
super(Encoder, self).__init__(dtype = 'float64', name = 'Encoder')
self.input_layer = DenseLayer(128)
self.hidden_layer1 = DenseLayer(128)
self.hidden_layer2 = DenseLayer(64)
self.hidden_layer3 = DenseLayer(64)
self.output_layer = LinearLayer(64)
def call(self, input_data, training):
fx = self.input_layer(input_data)
fx = self.hidden_layer1(fx)
fx = self.hidden_layer2(fx)
fx = self.hidden_layer3(fx)
return self.output_layer(fx)
class LinearLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(LinearLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(DenseLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
x = tf.matmul(inputs, self.w) + self.b
return tf.nn.elu(x)
I was wondering about this as well. Turns out that -2 is tf.data.UNKNOWN_CARDINALITY (https://www.tensorflow.org/api_docs/python/tf/data#UNKNOWN_CARDINALITY), which represents that TF doesn't know how many elements the flat_map returns per item.
I just asked Windowing a TensorFlow dataset without losing cardinality information? to see if anyone knows a way to window datasets without losing cardinality.

AttributeError: 'SparseCategoricalCrossentropy' object has no attribute '_id'

I've been trying to recreate a simple DNN using just the base Keras layer and writing everything from scratch. Everything seems to work just fine, but during the training loop I get this error:
AttributeError: 'SparseCategoricalCrossentropy' object has no attribute '_id'
I've tried changing the loss function to either CategoricalCrossentropy and SparseCategoricalCrossentropy (with from_logits True or False), but the error always pops up.
Here's the code:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from utils import plot_image, plot_mnist_results, plot_value_array
class Flatten(keras.layers.Layer):
def __init__(self):
super(Flatten, self).__init__()
def build(self, input_shape):
self.output_size = np.prod(input_shape)
def call(self, X):
return tf.reshape(X, shape=(-1, self.output_size))
class Dense(keras.layers.Layer):
def __init__(self, units, activation):
super(Dense, self).__init__()
self.units = units
self.activation = activation
def build(self, input_shape):
self.kernel = self.add_weight(
name='kernel',
dtype=tf.float64,
initializer='glorot_normal',
trainable=True,
shape=(input_shape[-1], self.units)
)
self.bias = self.add_weight(
name='bias',
dtype=tf.float64,
initializer=keras.initializers.Constant(0.1),
trainable=True,
shape=(1, self.units)
)
def call(self, X):
return self.activation(tf.matmul(X, self.kernel) + self.bias)
class DNN(keras.models.Model):
def __init__(self, units, activation):
super(DNN, self).__init__()
self.units = units
self.activation = activation
def build(self, input_shape):
self.flatten = Flatten()
self.hidden_layer = Dense(self.units, tf.nn.relu)
self.output_layer = Dense(10, tf.nn.softmax)
def call(self, X):
print(self.hidden_layer(self.flatten(X)).shape)
print(self.output_layer(self.hidden_layer(self.flatten(X))).shape)
return self.output_layer(self.hidden_layer(self.flatten(X)))
# #tf.function
def train(model, loss, opt, X, y):
with tf.GradientTape() as tape:
gradients = tape.gradient(loss(model(X), y), model.trainable_variables)
gradient_variables = zip(gradients, model.trainable_variables)
opt.apply_gradients(gradient_variables)
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0
model = DNN(units=128, activation=tf.nn.relu)
opt = tf.optimizers.Adam(learning_rate=1e-3)
for epoch in range(3):
for step in range(train_labels.shape[0]):
loss = keras.losses.SparseCategoricalCrossentropy
train(model, loss, opt, train_images[step, :, :], train_labels[step])
train_loss = loss(model(train_images), train_labels)
template = 'Epoch {}, Train loss: {:.5f}'
print(template.format(epoch + 1, train_loss.numpy()))
I would expect for the model to train successfully, but it doesn't seem to be the case. What am I doing wrong?
From the given code, i could see that you are using tf and keras intermixed in places like given below.
opt = tf.optimizers.Adam(learning_rate=1e-3)
loss = keras.losses.SparseCategoricalCrossentropy
This could raise issues like this. For TensorFlow 2.0, you can use tf.keras uniformly in all places wherever you use keras directly.
Also i could find that, you are instantiating loss object inside the batch loop. which is not correct. You have to instantiate at the top of starting you epoch loop.
Rest all seems fine. Hope this helps!!!

How can I deal with this code error which happens in my custom layer in Keras?

I want to make a custom layer in Keras.
In this example, I use a variable to multiply the tensor, but i get the error which is
in /keras/engine/training_arrays.py, line 304, in predict_loop
outs[i][batch_start:batch_end] = batch_out ValueError: could not broadcast input array from shape (36) into shape (2).
Actually i have check this file, but i get nothing. Is there some wrong in my custom layer?
#the definition of mylayer.
from keras import backend as K
import keras
from keras.engine.topology import Layer
class mylayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(mylayer, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel',
shape=(1,),dtype='float32',trainable=True,initializer='uniform')
super(mylayer, self).build(input_shape)
def call(self, inputs, **kwargs):
return self.kernel * inputs[0]
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[1])
#the test of mylayer.
from mylayer import mylayer
from tensorflow import keras as K
import numpy as np
from keras.layers import Input, Dense, Flatten
from keras.models import Model
x_train = np.random.random((2, 3, 4, 3))
y_train = np.random.random((2, 36))
print(x_train)
x = Input(shape=(3, 4, 3))
y = Flatten()(x)
output = mylayer((36, ))(y)
model = Model(inputs=x, outputs=output)
model.summary()
model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(x_train, y_train, epochs=2)
hist = model.predict(x_train,batch_size=2)
print(hist)
print(model.get_layer(index=1).get_weights())
#So is there some wrong in my custom error?
Especially, when i train this net, it's ok,but when i try to use "prdict", it's wrong.
Your shape of self.kernel * inputs[0] is (36,), but your expectation is (?,36). Change it:
def call(self, inputs, **kwargs):
return self.kernel * inputs
If you want to output the weight of mylayer, you should set index=2.

Categories