My Input data and target data are different types of data, and in different dimensions. The input data is 1D, and the target data is 2D. I don’t know how to modify the model to fit this situation.
I have built an Encoder/Decoder model with tokenized MIDI data as input, and coordinate data in CSV format as output.
The input dimension is
(num_of_data, sequence_length, data_dimension) = (22, 1000~3000, 1)
The target dimension is
(num_of_data, sequence_length, data_dimension) = (22, 1000~3000, 102)
The Encoder/Decoder model architecture I found :
class Encoder(nn.Module):
def __init__(self,
input_size = 2,
embedding_size = 128,
hidden_size = 256,
n_layers = 4,
dropout = 0.5):
super().__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.linear = nn.Linear(input_size, embedding_size)
self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers,
dropout = dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
embedded = self.dropout(F.relu(self.linear(x)))
output, (hidden, cell) = self.rnn(embedded)
return hidden, cell
class Decoder(nn.Module):
def __init__(self,
output_size = 2,
embedding_size = 128,
hidden_size = 256,
n_layers = 4,
dropout = 0.5):
super().__init__()
self.output_size = output_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.embedding = nn.Linear(output_size, embedding_size)
self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers, dropout = dropout)
self.linear = nn.Linear(hidden_size, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x, hidden, cell):
x = x.unsqueeze(0)
embedded = self.dropout(F.relu(self.embedding(x)))
output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
prediction = self.linear(output.squeeze(0))
return prediction, hidden, cell
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device
assert encoder.hidden_size == decoder.hidden_size, \
"Hidden dimensions of encoder and decoder must be equal!"
assert encoder.n_layers == decoder.n_layers, \
"Encoder and decoder must have equal number of layers!"
def forward(self, x, y, teacher_forcing_ratio = 0.5):
batch_size = x.shape[1]
target_len = y.shape[0]
outputs = torch.zeros(y.shape).to(self.device)
hidden, cell = self.encoder(x)
decoder_input = x[-1, :, :]
for i in range(target_len):
output, hidden, cell = self.decoder(decoder_input, hidden, cell)
outputs[i] = output
teacher_forcing = random.random() < teacher_forcing_ratio
decoder_input = y[i] if teacher_forcing else output
return outputs
I set the input data and target data length to 900 as they need to have the same sequence_length before I convert them to tensor:
tokenized_data shape: (22, n)
target_data shape: (22, m, 102)
↓
tokenized_data shape: (22, 900)
target_data shape: (22, 900, 102)
input_tensor = torch.Tensor(input_data)
target_tensor = torch.Tensor(target_data)
torch.Size([22, 900])
torch.Size([22, 900, 102])
The parameters and model are listed below:
source = input_tensor.to(device)
target = target_tensor.to(device)
input_size = 900 # I am not sure if this is correct
output_size = (900,102) # I am not sure if this is correct
print('Input : {} Output : {}'.format(input_size, output_size))
embed_size = 256
hidden_size = 512
num_layers = 3
num_iteration = 100
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
encoder = Encoder(input_size, hidden_size, embed_size, num_layers, ENC_DROPOUT)
decoder = Decoder(output_size, hidden_size, embed_size, num_layers, DEC_DROPOUT)
model = Seq2Seq(encoder, decoder, device).to(device)
I then tried to train the model with:
model = trainModel(model, source, target, num_iteration)
The above results in the error message shown below:
TypeError: empty(): argument 'size' must be tuple of ints, but found element of type tuple at pos 2
I revised output_size = [900,102] to output_size = 900, but I got the error below:
IndexError: too many indices for tensor of dimension 2
I am having problems with the Encoder/Decoder model that can’t deal with the input and target tensors having different dimensions. Any help or advice on how to create an Encoder/Decoder model that will take input and target tensors in different dimensions will be greatly appreciated.
Thank you in advance for your input.
Related
I’m getting the runtime error when trying to Sequentialise my linear layer with a DINO backbone from torch.hub.
import torch
import torch.distributed as dist
class LinearClassifier(torch.nn.Module):
def __init__(self, dim, num_labels=1000):
super(LinearClassifier, self).__init__()
self.num_labels = num_labels
self.linear = torch.nn.Linear(dim, num_labels)
self.linear.weight.data.normal_(mean=0.0, std=0.01)
self.linear.bias.data.zero_()
def forward(self, x):
# flatten
x = x.view(x.size(0), -1)
# linear layer
return self.linear(x)
dist.init_process_group('gloo', init_method='file:///tmp/somefile', rank=0, world_size=1)
# load backbone
model = torch.hub.load('facebookresearch/dino:main', 'dino_vits8')
#Setup linear layer
linear_classifier = LinearClassifier(1536, 1000)
linear_classifier = linear_classifier.cuda()
linear_classifier = torch.nn.parallel.DistributedDataParallel(linear_classifier)
state_dict = torch.hub.load_state_dict_from_url(url="https://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_linearweights.pth")['state_dict']
linear_classifier.load_state_dict(state_dict, strict=True)
#Sequentialise
model = torch.nn.Sequential(model,
linear_classifier)
x = torch.ones((1, 3, 224, 224))
out = model(x)
print("out: " + out)
Here is the print of the last layers of my sequentialised model:
last layers printed
It loos lke the output of the model(x) (as defined by model = torch.hub...) has shape 1 x 384, but your linear_classifier expects something of shape _ x 1536 which is why you'll get this error. So you can just adjust that number of inputs by setting
linear_classifier = LinearClassifier(384, 1000)
I tried to repeat https://github.com/munhouiani/Deep-Packet and came across an error
This program uses CNN to classify network traffic. I decided to rewrite the program as I could not run the original on my computer. I am new to neural networks, so I cannot give a detailed description of the problem
TypeError: conv1d() received an invalid combination of arguments - got (list, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!list!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!list!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
code:
import torch
from pathlib import Path
from torch import nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from argparse import Namespace
from pytorch_lightning import Trainer
import pytorch_lightning as pl
import numpy as np
class CNN(pl.LightningModule):
def __init__(self, hparams):
super().__init__()
# config
self.save_hyperparameters(hparams)
self.data_path = self.hparams.data_path
# two convolution, then one max pool
self.conv1 = nn.Sequential(
nn.Conv1d(
in_channels=1,
out_channels=self.hparams.c1_output_dim,
kernel_size=self.hparams.c1_kernel_size,
stride=self.hparams.c1_stride
),
nn.ReLU()
)
self.conv2 = nn.Sequential(
nn.Conv1d(
in_channels=self.hparams.c1_output_dim,
out_channels=self.hparams.c2_output_dim,
kernel_size=self.hparams.c2_kernel_size,
stride=self.hparams.c2_stride
),
nn.ReLU()
)
self.max_pool = nn.MaxPool1d(
kernel_size=2
)
# flatten, calculate the output size of max pool
# use a dummy input to calculate
dummy_x = torch.rand(1, 1, self.hparams.signal_length, requires_grad=False)
dummy_x = self.conv1(dummy_x)
dummy_x = self.conv2(dummy_x)
dummy_x = self.max_pool(dummy_x)
max_pool_out = dummy_x.view(1, -1).shape[1]
# followed by 5 dense layers
self.fc1 = nn.Sequential(
nn.Linear(
in_features=max_pool_out,
out_features=200
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(
in_features=200,
out_features=100
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc3 = nn.Sequential(
nn.Linear(
in_features=100,
out_features=50
),
nn.Dropout(p=0.05),
nn.ReLU()
)
# finally, output layer
self.out = nn.Linear(
in_features=50,
out_features=self.hparams.output_dim
)
def forward(self, x):
# make sure the input is in [batch_size, channel, signal_length]
# where channel is 1
# signal_length is 1500 by default
#batch_size = x.shape[0]
batch_size = 16
# 2 conv 1 max
x = self.conv1(x)
x = self.conv2(x)
x = self.max_pool(x)
x = x.reshape(batch_size, -1)
# 3 fc
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
# output
x = self.out(x)
return x
def train_dataloader(self):
reader = self.data_path
dataloader = DataLoader(reader, batch_size=16)
return dataloader
def configure_optimizers(self):
return torch.optim.Adam(self.parameters())
def training_step(self, batch, batch_idx):
x = batch
y = batch
y_hat = self(x)
loss = F.cross_entropy(y_hat, y)
if (batch_idx % 50) == 0:
self.logger.log_metrics(loss, step=batch_idx)
return loss
num_epochs = 6
num_classes = 10
batch_size = 100
learning_rate = 0.001
train_dataset = "D:\Deep-Packet-master\Deep-Packet-master\processed_data"
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
hparams = Namespace(**{
'c1_kernel_size': 4,
'c1_output_dim': 200,
'c1_stride': 3,
'c2_kernel_size': 5,
'c2_output_dim': 200,
'c2_stride': 1,
'output_dim': 17,
'data_path': train_dataset,
'signal_length': 1500,
'epoch': 6
})
model = CNN(hparams).float()
gpus = None
trainer = Trainer(val_check_interval=4, max_epochs=1)
trainer.fit(model)
trainer.save_checkpoint(str(train_dataset.absolute()))
Please, help
I'm going to guess that your training_step is incorrect:
def training_step(self, batch, batch_idx):
x = batch[0]
y = batch[1]
y_hat = self(x)
loss = F.cross_entropy(y_hat, y)
if (batch_idx % 50) == 0:
self.logger.log_metrics(loss, step=batch_idx)
return loss
In your code, you set both x and y to batch which should be a tuple or a list, which conv1d's forward cannot interpret.
I was building a self-defined encoder-decoder tf.keras.Model and saved my checkpoint. After closing my Jupyter notebook and open it again trying to restore my encoder-decoder parameters, I surprisingly found that it is not working. I am not sure it's that I understand the usage wrong or something was wrong with my steps of doing it. Here is my code.
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
encoder=encoder,
decoder=decoder)
tf.config.experimental_run_functions_eagerly(True)
EPOCHS = 10
TOLERANCE = 0.08
start = time.time()
for epoch in range(1, EPOCHS+1):
epoch_start = time.time()
# train the encoder-decoder model
batch = 0
total_loss = 0
total_accuracy = 0
for inp, targ in dataset.take(STEP_PER_EPOCH):
batch += 1
batch_loss, batch_accuracy = train_step(inp, targ, phoneme_tokenizer)
total_loss += batch_loss
total_accuracy += batch_accuracy
print("Epoch: {}/{} Batch: {} Loss: {:.4f} Accuracy: {:.4f} Time: {:.0f}s".
format(epoch, EPOCHS, batch, batch_loss.numpy(), batch_accuracy.numpy(), time.time()-epoch_start),
end="\r")
if batch % 1000 == 0:
print()
print()
# saving (checkpoint) the model when total loss is less than 0.9
checkpoint.save(file_prefix=checkpoint_prefix)
# validation process
total_val_loss = 0
total_val_acc = 0
for val_inp, val_targ in dataset_val.take(VAL_WAV_SIZE):
val_loss, val_acc = validate_step(val_inp, val_targ, phoneme_tokenizer)
total_val_loss += val_loss
total_val_acc += val_acc
# print out the epoch results
mean_total_acc = total_accuracy / STEP_PER_EPOCH
mean_total_loss = total_loss / STEP_PER_EPOCH
mean_val_acc = total_val_acc / VAL_WAV_SIZE
mean_val_loss = total_val_loss / VAL_WAV_SIZE
print("\n================================")
print("Epoch {}/{}".format(epoch, EPOCHS))
print('Accuracy: {:.4f} Loss: {:.4f} val_acc: {:.4f} val_loss: {:.4f}'.format(
mean_total_acc,
mean_total_loss,
mean_val_acc,
mean_val_loss))
print('Time taken for epoch {}: {:.2f} min'.format(epoch, (time.time() - epoch_start)/60))
print('Total Time taken: {:.2f} min'.format((time.time() - start)/60))
print("================================\n")
if mean_total_loss < TOLERANCE and mean_val_acc > 0.5:
break
After running the code above, it showed no errors and I got my checkpoints files in my directory. I closed my Jupyter notebook and built all the objects(which are my encoder and decoder) without training and typed
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
hoped that the parameters would come back and I can start my prediction but I just got pretty poor outcome anyway, which was against the outcome predicted after training right away. Should I add some more lines for restoring all the parameters or something else?
Below are more details about my encoder decoder structure, and my input shape is (batch size, 99, 13)
class ResnetIdentityBlock(tf.keras.layers.Layer):
def __init__(self, kernel_size, filters):
super(ResnetIdentityBlock, self).__init__()
self.filters1, self.filters2, self.filters3 = filters
self.conv1 = tf.keras.layers.Conv1D(self.filters1, 1, padding='valid')
self.bn1 = tf.keras.layers.BatchNormalization()
self.conv2 = tf.keras.layers.Conv1D(self.filters2, kernel_size, padding='same')
self.bn2 = tf.keras.layers.BatchNormalization()
self.conv3 = tf.keras.layers.Conv1D(self.filters3, 1, padding='valid')
self.bn3 = tf.keras.layers.BatchNormalization()
def call(self, input_tensor, training=False):
x = self.conv1(input_tensor)
x = self.bn1(x, training=training)
x = tf.nn.relu(x)
x = self.conv2(x)
x = self.bn2(x, training=training)
x = tf.nn.relu(x)
x = self.conv3(x)
x = self.bn3(x, training=training)
x += input_tensor
return tf.nn.relu(x)
class Encoder(tf.keras.Model):
'''
Encoder for MFCC transformed wave data
'''
def __init__(self,
lstm_units,
batch_sz,
dropout_rate,
units,
squeeze_time):
'''
Args:
lstm_units: LSTM units number
batch_sz: batch size
dropout_rate: layer dropout ratio
rnn_initial_weight: type of weight initialization
'''
super(Encoder, self).__init__()
self.lstm_units = lstm_units
self.squeeze_time = squeeze_time
# conv1d
self.feat_extract = tf.keras.layers.Dense(units=units, activation="relu")
self.feat_dropout = tf.keras.layers.Dropout(dropout_rate)
# ResNet
self.resnet1 = ResnetIdentityBlock(kernel_size=11, filters=[units, units, units])
units *= squeeze_time
self.resnet2 = ResnetIdentityBlock(kernel_size=7, filters=[units, units, units])
units *= squeeze_time
self.resnet3 = ResnetIdentityBlock(kernel_size=3, filters=[units, units, units])
# Encoder lstm
self.enc_lstm = tf.keras.layers.LSTM(units=lstm_units,
return_sequences=True,
return_state=True,
kernel_initializer="lecun_normal",
activation='tanh',
recurrent_activation='sigmoid',
recurrent_initializer='orthogonal',
dropout=dropout_rate)
def call(self, inputs):
'''
call pyramidal LSTM neural network encoder
Args:
inputs: wave input
'''
x = self.feat_extract(inputs)
x = self.feat_dropout(x)
# ResNet
x = self.resnet1(x)
x = self.reshape_pyramidal(x)
x = self.resnet2(x)
x = self.reshape_pyramidal(x)
x = self.resnet3(x)
# encoder output layer
fw_outputs, fw_state_h, fw_state_c = self.enc_lstm(x)
return fw_outputs, fw_state_h, fw_state_c
def reshape_pyramidal(self, outputs):
'''
After concatenating forward and backward outputs
return the reshaped output
Args:
outputs: outputs from LSTM
squeeze_time: time step one would like to squeeze in pyramidal LSTM
'''
batch_size, time_steps, num_units = outputs.shape
return tf.reshape(outputs, (batch_size, -1, num_units * self.squeeze_time))
class LuongAttention(tf.keras.layers.Layer):
def __init__(self, units):
super(LuongAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(units, activation="relu")
self.W2 = tf.keras.layers.Dense(units, activation="relu")
self.V = tf.keras.layers.Dense(1, activation="relu")
def call(self, query, values):
# query hidden state shape == (batch_size, hidden size)
# query_with_time_axis shape == (batch_size, 1, hidden size)
# values shape == (batch_size, max_len, hidden size)
# we are doing this to broadcast addition along the time axis to calculate the score
query_with_time_axis = tf.expand_dims(query, 1)
# score shape == (batch_size, max_length, 1)
# we get 1 at the last axis because we are applying score to self.V
# the shape of the tensor before applying self.V is (batch_size, max_length, units)
score = self.V(tf.nn.tanh(
self.W1(query_with_time_axis) + self.W2(values)))
# attention_weights shape == (batch_size, max_length, 1)
attention_weights = tf.nn.softmax(score, axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class Decoder(tf.keras.Model):
'''
Decoder for output phonemes
'''
def __init__(self,
target_sz,
embedding_dim,
lstm_units,
batch_sz,
dropout_rate):
'''
Args:
target_sz: target size, total phoneme size in this case
embedding_dim: embedding dimension
lstm_units: LSTM units number
batch_sz: batch size
dropout_rate: dropout ratio
rnn_initial_weight: type of weight initialization
'''
super(Decoder, self).__init__()
self.batch_sz = batch_sz
self.target_sz = target_sz
self.lstm_units = lstm_units
self.embedding = tf.keras.layers.Embedding(target_sz, embedding_dim)
# attention model
self.attention = LuongAttention(lstm_units)
# decoder rnn
self.lstm1 = tf.keras.layers.LSTM(units=lstm_units,
return_sequences=True,
return_state=True,
kernel_initializer="lecun_normal",
activation='tanh',
recurrent_activation='sigmoid',
recurrent_initializer='orthogonal',
dropout=dropout_rate)
# Fully-connected
self.fc1 = tf.keras.layers.Dense(64, activation="relu")
self.fc1_dropout = tf.keras.layers.Dropout(dropout_rate)
self.fc2 = tf.keras.layers.Dense(target_sz, activation="softmax")
# build layer info dictionary
self.layer_info = dict()
def call(self, inputs, enc_hidden_h, enc_hidden_c, enc_output):
'''
call LSTM decoder
Args:
inputs: target output, following phoneme for wave data input in this case
enc_hidden_h: encoder hidden state h
enc_hidden_c: encoder hidden state c
enc_output: encoder outputs
'''
# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(inputs)
# enc_output shape == (batch_size, max_length, hidden_size)
context_vector, attention_weights = self.attention(enc_hidden_h, enc_output)
# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
# passing the concatenated vector to the 2-layer LSTM (Decoder)
x, state_h, state_c = self.lstm1(x)
# dense layer before final predict output dense layer
x = tf.reshape(x, (-1, x.shape[-1]))
x = self.fc1(x)
x = self.fc1_dropout(x)
# output shape == (batch_size, phoneme size)
x = self.fc2(x)
return x, (state_h, state_c), attention_weights
I'm trying to follow this tutrial https://colab.research.google.com/github/tensorflow/examples/blob/master/community/en/transformer_chatbot.ipynb, However, when I tried to save the model in order to load it again without training I got an error mentioned here NotImplementedError: Layers with arguments in `__init__` must override `get_config`
I understood from the answer that I need to make the encoder and decoder as classes and customise it(instead of leaving it as functions like the colab tutrial) so I went back to tensor flow documentation of this model here: https://www.tensorflow.org/tutorials/text/transformer#encoder_layer and tried to edit in it. I made the encoder layer as:
class EncoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, rate=0.1,**kwargs,):
#super(EncoderLayer, self).__init__()
super().__init__(**kwargs)
self.mha = MultiHeadAttention(d_model, num_heads)
self.ffn = point_wise_feed_forward_network(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = tf.keras.layers.Dropout(rate)
self.dropout2 = tf.keras.layers.Dropout(rate)
def get_config(self):
config = super().get_config().copy()
config.update({
#'vocab_size': self.vocab_size,
#'num_layers': self.num_layers,
#'units': self.units,
'd_model': self.d_model,
'num_heads': self.num_heads,
'dropout': self.dropout,
})
return config
def call(self, x, training, mask):
attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model)
ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model)
return out2
and same for the decoder layer class. Then the same encoder in the documentation of tf
class Encoder(tf.keras.layers.Layer):
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
maximum_position_encoding, rate=0.1):
super(Encoder, self).__init__()
self.d_model = d_model
self.num_layers = num_layers
self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
self.pos_encoding = positional_encoding(maximum_position_encoding,
self.d_model)
self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)
for _ in range(num_layers)]
self.dropout = tf.keras.layers.Dropout(rate)
def call(self, x, training, mask):
seq_len = tf.shape(x)[1]
# adding embedding and position encoding.
x = self.embedding(x) # (batch_size, input_seq_len, d_model)
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
x += self.pos_encoding[:, :seq_len, :]
x = self.dropout(x, training=training)
for i in range(self.num_layers):
x = self.enc_layers[i](x, training, mask)
return x # (batch_size, input_seq_len, d_model)
the function of the model as:
def transformer(vocab_size,
num_layers,
units,
d_model,
num_heads,
dropout,
name="transformer"):
inputs = tf.keras.Input(shape=(None,), name="inputs")
dec_inputs = tf.keras.Input(shape=(None,), name="dec_inputs")
enc_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='enc_padding_mask')(inputs)
# mask the future tokens for decoder inputs at the 1st attention block
look_ahead_mask = tf.keras.layers.Lambda(
create_look_ahead_mask,
output_shape=(1, None, None),
name='look_ahead_mask')(dec_inputs)
# mask the encoder outputs for the 2nd attention block
dec_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='dec_padding_mask')(inputs)
enc_outputs = Encoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
input_vocab_size=vocab_size,
)(inputs=[inputs, enc_padding_mask])
dec_outputs = Decoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
target_vocab_size=vocab_size,
)(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask])
outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs)
return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)
and calling the model:
#the model itself with its paramters:
# Hyper-parameters
NUM_LAYERS = 3
D_MODEL = 256
#D_MODEL=tf.cast(D_MODEL, tf.float32)
NUM_HEADS = 8
UNITS = 512
DROPOUT = 0.1
model = transformer(
vocab_size=VOCAB_SIZE,
num_layers=NUM_LAYERS,
units=UNITS,
d_model=D_MODEL,
num_heads=NUM_HEADS,
dropout=DROPOUT)
However, I got that error:
TypeError: __init__() missing 2 required positional arguments: 'dff' and 'maximum_position_encoding'
I am really confused and I don't understand what dff and maximum position encoding mean in the documentation and when I removed them from the encoder and decoder classes, I got anther error as positional_encoding function takes maximum position as input and also dff is passed as input inside the class. I am not so sure what I should do as I am not sure whether I am following the right steps or not
If you get this error while calling transformer then your problem is with creating the model, not saving it.
Other than that, I see several issues with your get_config:
You defined dropout instead of rate.
The attributes you address (self.d_model etc.) are not defined or assigned at __init__.
It doesn't exist for your Encoder class.
I have a Bucketiterator from torchtext that I feed to a model in pytorch. An example of how the iterator is constructed:
train_iter, val_iter = BucketIterator.splits((train,val),
batch_size=batch_size,
sort_within_batch = True,
device = device,
shuffle=True,
sort_key=lambda x: (len(x.src), len(x.trg)))
The data is then fed to a model like this, where I use the nn.Embedding layer.
class encoder(nn.Module):
def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
super().__init__()
self.input_dim = input_dim
self.emb_dim = emb_dim
self.hid_dim = hid_dim
self.n_layers = n_layers
self.dropout = dropout
self.embedding = nn.Embedding(input_dim, emb_dim)
self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, src):
#src = [src sent len, batch size]
embedded = self.dropout(self.embedding(src))
#embedded = [src sent len, batch size, emb dim]
hidden_enc = []
outputs, hidden = self.rnn(embedded[0,:,:].unsqueeze(0))
for i in range(1,len(embedded[:,1,1])):
outputs, hidden = self.rnn(embedded[i,:,:].unsqueeze(0),hidden)
hidden_cpu = []
for k in range(len(hidden)):
hidden_cpu.append(hidden[k])
hidden_cpu[k] = hidden[k].cpu()
hidden_enc.append(tuple(hidden_cpu))
#outputs, hidden = self.rnn(embedded)
#outputs = [src sent len, batch size, hid dim * n directions]
#hidden = [n layers * n directions, batch size, hid dim]
#cell = [n layers * n directions, batch size, hid dim]
None
#outputs are always from the top hidden layer
return hidden, hidden_enc
But what if I wanted the embedding to be one-hot encoded? I work on formal languages and it would be nice to preserve orthogonality between tokens. It doesn't seem like pytorch or torchtext has any functionality for doing this.
def get_one_hot_torch_tensor(in_tensor):
"""
Function converts a 1d or 2d torch tensor to one-hot encoded
"""
n_channels = torch.max(in_tensor)+1 # maximum number of channels
if in_tensor.ndim == 2:
out_one_hot = torch.zeros((n_channels, in_tensor.shape[0], in_tensor.shape[1]))
# print(out_one_hot)
index = np.indices((in_tensor.shape[0], in_tensor.shape[1])) # create an array of indices
x, y = index[0], index[1]
print(x, y)
out_one_hot[in_tensor, x, y] = 1
print(out_one_hot)