I have trained a stacked LSTM on PyTorch Lightning with the following layers:
def __init__(self, n_features, hidden_size, batch_size, num_layers, dropout, learning_rate):
super(LSTMClassifier, self).__init__()
# Architecture Baseline
self.lstm = nn.LSTM(input_size=n_features,
self.relu = nn.ReLU()
self.fc = nn.Linear(hidden_size, 2)
self.sigmoid = nn.Sigmoid()
and architecture:
def forward(self, x):
# reshape to pass each element of sequence through lstm, and not all together
# LSTM needs a 3D tensor
x = x.view(len(x), 1, -1)
out, _ = self.lstm(x)
out = self.relu(out)
out = self.fc(out)
out = self.sigmoid(out)
# reshape back to be compatible with the true values' shape
out = out.reshape(self.batch_size, -1)
return out
Now, I want to use this pre-trained model for transfer learning according to the PyTorch Lightning tutorial:
import torchvision.models as models
class ImagenetTransferLearning(LightningModule):
def __init__(self):
# init a pretrained resnet
backbone = # loading the pretrained model from file
num_filters = backbone.fc.in_features
layers = list(backbone.children())[:-3]
self.feature_extractor = nn.Sequential(*layers)
# use the pretrained model for binary classification
num_target_classes = 2
self.classifier = nn.Linear(num_filters, num_target_classes)
def forward(self, x):
with torch.no_grad():
representations = self.feature_extractor(x).flatten(1)
x = self.classifier(representations)
However, LSTM layers cannot be entered into an nn.Sequential() as per this question. So, what sort of module can I use as feature_extractor instead of nn.Sequential()?
Ultimately, based on the linked question, I changed the feature_extractor definition as below (assuming you know the layers you want to freeze):
self.feature_extractor = nn.Sequential(self.layers[0], GetLSTMOutput(), self.layers[1])
Hopefully, this helps someone as I've been baffled for a while.
for example, I define a model for 2 tasks in multi-task way.
class BertMy(nn.Module):
def __init__(self, segment_size, output_size, dropout):
super(BertMy, self).__init__()
self.bert = AutoModelForMaskedLM.from_pretrained("cl-tohoku/bert-base-japanese")
self.bert_vocab_size = 32000
self.bn = nn.BatchNorm1d(segment_size*self.bert_vocab_size)
self.fc1 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.fc2 = nn.Linear(segment_size*self.bert_vocab_size, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.bert(x).logits
x = x.view(x.shape[0], -1)
x1 = self.fc1(self.dropout(self.bn(x)))
x2 = self.fc2(self.dropout(self.bn(x)))
return [x1,x2]
I want add 2 Individual LSTM layers for 2 tasks, in order to predicate them Individually.
How should I define this in Pytorch?
Problem description:
I have a model based on BERT, with a classifier layer on top. I want to export it to ONNX, but to avoid issues on the side of the 'user' of the onnx model, I want to export the entire pipeline, including tokenization, as a ONNX model. However, this requires a basic string as input type, which I believe ONNX does not support.
The Model:
class BertClassifier(nn.Module):
Class defining the classifier model with a BERT encoder and a single fully connected classifier layer.
def __init__(self, dropout=0.5, num_labels=24):
super(BertClassifier, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-uncased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, num_labels)
self.relu = nn.ReLU()
self.best_score = 0
def forward(self, input_id, mask):
_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
output = self.relu(self.linear(self.dropout(pooled_output)))
return output
The Tokenizer:
def get_tokenizer(chosen_model):
# chosen_model = 'bert_base_uncased'
return AutoTokenizer.from_pretrained(chosen_model)
Combined Pipeline:
class OnnxBertModel(nn.Module):
Model wrapper for onnx. Allows user to only provide a string as input. Output is a list of class probabilities
def __init__(self, dropout=0.5, num_labels=24):
super(OnnxBertModel, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-uncased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, num_labels)
self.relu = nn.ReLU()
self.best_score = 0
self.tokenizer = get_tokenizer('bert-base-uncased')
def forward(self, input_string):
input_tokens = self.tokenizer(input_string,
padding='max_length', max_length=512, truncation=True,
mask = input_tokens['attention_mask']
input_id = input_tokens['input_ids'].squeeze(1)
_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
output = self.relu(self.linear(self.dropout(pooled_output)))
return output
Additional code to export:
model = OnnxBertModel(num_labels=len(labels))
torch.onnx.export(model, ex_string, 'tryout.onnx', export_params=True, do_constant_folding=False)
The last call does not work due to the string typing.
In every forward pass of the model, I want to implement l2 normalization on the softmax layer's columns, then set the weights back as per the imprinted weights paper and this pytorch implementation. I am using layer.set_weights() to set the normalized weights during the call() function of the model, but this implementation only works with eager execution, as something goes wrong with layer.set_weights() when building the graph.
here is the implementation of the model in tf 1.15:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense
class Extractor(Model):
def __init__(self, input_shape):
super(Extractor, self).__init__()
self.basenet = ResNet50(include_top=False, weights="imagenet",
pooling="avg", input_shape=input_shape)
def call(self, x):
x = self.basenet(x)
class Embedding(Model):
def __init__(self, num_nodes, norm=True):
super(Embedding, self).__init__()
self.fc = Dense(num_nodes, activation="relu")
self.norm = norm
def call(self, x):
x = self.fc(x)
if self.norm:
x = tf.nn.l2_normalize(x)
return x
class Classifier(Model):
def __init__(self, n_classes, norm=True, bias=False):
super(Classifier, self).__init__()
self.n_classes = n_classes
self.norm = norm
self.bias = bias
def build(self, inputs_shape):
self.prediction = Dense(self.n_classes,
def call(self, x):
if self.norm:
w = self.prediction.trainable_weights
if w:
w = tf.nn.l2_normalize(w, axis=2)
x = self.prediction(x)
return x
class Net(Model):
def __init__(self, input_shape, n_classes, num_nodes, norm=True,
super(Net, self).__init__()
self.n_classes = n_classes
self.num_nodes = num_nodes
self.norm = norm
self.bias = bias
self.extractor = Extractor(input_shape)
self.embedding = Embedding(self.num_nodes, norm=self.norm)
self.classifier = Classifier(self.n_classes, norm=self.norm,
def call(self, x):
x = self.extractor(x)
x = self.embedding(x)
x = self.classifier(x)
return x
The weight normalization can be found in the call step of the Classifier class, where I call .set_weights() after normalizing it.
Creating the model with model = Net(input_shape,n_classes, num_nodes) and using model(x) works, but model.predict() and model.fit() give me errors about .get_weights(). I can train the model in eager mode with gradient tape, but it is extremely slow.
Is there another way I can set the weights of a Dense layer during training in each forward call but lets me use the model outside of eager mode? When I say eager mode I mean with tf.enable_eager_execution() at the start of the program.
Here is the error I get when I use model.predict(x) instead:
TypeError: len is not well defined for symbolic Tensors. (imprint_net_1/classifier/l2_normalize:0) Please call `x.shape` rather than `len(x)` for shape information.
I've been trying to recreate a simple DNN using just the base Keras layer and writing everything from scratch. Everything seems to work just fine, but during the training loop I get this error:
AttributeError: 'SparseCategoricalCrossentropy' object has no attribute '_id'
I've tried changing the loss function to either CategoricalCrossentropy and SparseCategoricalCrossentropy (with from_logits True or False), but the error always pops up.
Here's the code:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from utils import plot_image, plot_mnist_results, plot_value_array
class Flatten(keras.layers.Layer):
def __init__(self):
super(Flatten, self).__init__()
def build(self, input_shape):
self.output_size = np.prod(input_shape)
def call(self, X):
return tf.reshape(X, shape=(-1, self.output_size))
class Dense(keras.layers.Layer):
def __init__(self, units, activation):
super(Dense, self).__init__()
self.units = units
self.activation = activation
def build(self, input_shape):
self.kernel = self.add_weight(
shape=(input_shape[-1], self.units)
self.bias = self.add_weight(
shape=(1, self.units)
def call(self, X):
return self.activation(tf.matmul(X, self.kernel) + self.bias)
class DNN(keras.models.Model):
def __init__(self, units, activation):
super(DNN, self).__init__()
self.units = units
self.activation = activation
def build(self, input_shape):
self.flatten = Flatten()
self.hidden_layer = Dense(self.units, tf.nn.relu)
self.output_layer = Dense(10, tf.nn.softmax)
def call(self, X):
return self.output_layer(self.hidden_layer(self.flatten(X)))
# #tf.function
def train(model, loss, opt, X, y):
with tf.GradientTape() as tape:
gradients = tape.gradient(loss(model(X), y), model.trainable_variables)
gradient_variables = zip(gradients, model.trainable_variables)
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0
model = DNN(units=128, activation=tf.nn.relu)
opt = tf.optimizers.Adam(learning_rate=1e-3)
for epoch in range(3):
for step in range(train_labels.shape[0]):
loss = keras.losses.SparseCategoricalCrossentropy
train(model, loss, opt, train_images[step, :, :], train_labels[step])
train_loss = loss(model(train_images), train_labels)
template = 'Epoch {}, Train loss: {:.5f}'
print(template.format(epoch + 1, train_loss.numpy()))
I would expect for the model to train successfully, but it doesn't seem to be the case. What am I doing wrong?
From the given code, i could see that you are using tf and keras intermixed in places like given below.
opt = tf.optimizers.Adam(learning_rate=1e-3)
loss = keras.losses.SparseCategoricalCrossentropy
This could raise issues like this. For TensorFlow 2.0, you can use tf.keras uniformly in all places wherever you use keras directly.
Also i could find that, you are instantiating loss object inside the batch loop. which is not correct. You have to instantiate at the top of starting you epoch loop.
Rest all seems fine. Hope this helps!!!
I want to make a custom layer in Keras.
In this example, I use a variable to multiply the tensor, but i get the error which is
in /keras/engine/training_arrays.py, line 304, in predict_loop
outs[i][batch_start:batch_end] = batch_out ValueError: could not broadcast input array from shape (36) into shape (2).
Actually i have check this file, but i get nothing. Is there some wrong in my custom layer?
#the definition of mylayer.
from keras import backend as K
import keras
from keras.engine.topology import Layer
class mylayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(mylayer, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel',
super(mylayer, self).build(input_shape)
def call(self, inputs, **kwargs):
return self.kernel * inputs[0]
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[1])
#the test of mylayer.
from mylayer import mylayer
from tensorflow import keras as K
import numpy as np
from keras.layers import Input, Dense, Flatten
from keras.models import Model
x_train = np.random.random((2, 3, 4, 3))
y_train = np.random.random((2, 36))
x = Input(shape=(3, 4, 3))
y = Flatten()(x)
output = mylayer((36, ))(y)
model = Model(inputs=x, outputs=output)
model.fit(x_train, y_train, epochs=2)
hist = model.predict(x_train,batch_size=2)
#So is there some wrong in my custom error?
Especially, when i train this net, it's ok,but when i try to use "prdict", it's wrong.
Your shape of self.kernel * inputs[0] is (36,), but your expectation is (?,36). Change it:
def call(self, inputs, **kwargs):
return self.kernel * inputs
If you want to output the weight of mylayer, you should set index=2.