One-hot encoding in pytorch/torchtext - python

I have a Bucketiterator from torchtext that I feed to a model in pytorch. An example of how the iterator is constructed:
train_iter, val_iter = BucketIterator.splits((train,val),
batch_size=batch_size,
sort_within_batch = True,
device = device,
shuffle=True,
sort_key=lambda x: (len(x.src), len(x.trg)))
The data is then fed to a model like this, where I use the nn.Embedding layer.
class encoder(nn.Module):
def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
super().__init__()
self.input_dim = input_dim
self.emb_dim = emb_dim
self.hid_dim = hid_dim
self.n_layers = n_layers
self.dropout = dropout
self.embedding = nn.Embedding(input_dim, emb_dim)
self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, src):
#src = [src sent len, batch size]
embedded = self.dropout(self.embedding(src))
#embedded = [src sent len, batch size, emb dim]
hidden_enc = []
outputs, hidden = self.rnn(embedded[0,:,:].unsqueeze(0))
for i in range(1,len(embedded[:,1,1])):
outputs, hidden = self.rnn(embedded[i,:,:].unsqueeze(0),hidden)
hidden_cpu = []
for k in range(len(hidden)):
hidden_cpu.append(hidden[k])
hidden_cpu[k] = hidden[k].cpu()
hidden_enc.append(tuple(hidden_cpu))
#outputs, hidden = self.rnn(embedded)
#outputs = [src sent len, batch size, hid dim * n directions]
#hidden = [n layers * n directions, batch size, hid dim]
#cell = [n layers * n directions, batch size, hid dim]
None
#outputs are always from the top hidden layer
return hidden, hidden_enc
But what if I wanted the embedding to be one-hot encoded? I work on formal languages and it would be nice to preserve orthogonality between tokens. It doesn't seem like pytorch or torchtext has any functionality for doing this.

def get_one_hot_torch_tensor(in_tensor):
"""
Function converts a 1d or 2d torch tensor to one-hot encoded
"""
n_channels = torch.max(in_tensor)+1 # maximum number of channels
if in_tensor.ndim == 2:
out_one_hot = torch.zeros((n_channels, in_tensor.shape[0], in_tensor.shape[1]))
# print(out_one_hot)
index = np.indices((in_tensor.shape[0], in_tensor.shape[1])) # create an array of indices
x, y = index[0], index[1]
print(x, y)
out_one_hot[in_tensor, x, y] = 1
print(out_one_hot)

Related

How to Use Encoder/Decoder Model with Different Dimensions of Input/Target

My Input data and target data are different types of data, and in different dimensions. The input data is 1D, and the target data is 2D. I don’t know how to modify the model to fit this situation.
I have built an Encoder/Decoder model with tokenized MIDI data as input, and coordinate data in CSV format as output.
The input dimension is
(num_of_data, sequence_length, data_dimension) = (22, 1000~3000, 1)
The target dimension is
(num_of_data, sequence_length, data_dimension) = (22, 1000~3000, 102)
The Encoder/Decoder model architecture I found :
class Encoder(nn.Module):
def __init__(self,
input_size = 2,
embedding_size = 128,
hidden_size = 256,
n_layers = 4,
dropout = 0.5):
super().__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.linear = nn.Linear(input_size, embedding_size)
self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers,
dropout = dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
embedded = self.dropout(F.relu(self.linear(x)))
output, (hidden, cell) = self.rnn(embedded)
return hidden, cell
class Decoder(nn.Module):
def __init__(self,
output_size = 2,
embedding_size = 128,
hidden_size = 256,
n_layers = 4,
dropout = 0.5):
super().__init__()
self.output_size = output_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.embedding = nn.Linear(output_size, embedding_size)
self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers, dropout = dropout)
self.linear = nn.Linear(hidden_size, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x, hidden, cell):
x = x.unsqueeze(0)
embedded = self.dropout(F.relu(self.embedding(x)))
output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
prediction = self.linear(output.squeeze(0))
return prediction, hidden, cell
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device
assert encoder.hidden_size == decoder.hidden_size, \
"Hidden dimensions of encoder and decoder must be equal!"
assert encoder.n_layers == decoder.n_layers, \
"Encoder and decoder must have equal number of layers!"
def forward(self, x, y, teacher_forcing_ratio = 0.5):
batch_size = x.shape[1]
target_len = y.shape[0]
outputs = torch.zeros(y.shape).to(self.device)
hidden, cell = self.encoder(x)
decoder_input = x[-1, :, :]
for i in range(target_len):
output, hidden, cell = self.decoder(decoder_input, hidden, cell)
outputs[i] = output
teacher_forcing = random.random() < teacher_forcing_ratio
decoder_input = y[i] if teacher_forcing else output
return outputs
I set the input data and target data length to 900 as they need to have the same sequence_length before I convert them to tensor:
tokenized_data shape: (22, n)
target_data shape: (22, m, 102)
↓
tokenized_data shape: (22, 900)
target_data shape: (22, 900, 102)
input_tensor = torch.Tensor(input_data)
target_tensor = torch.Tensor(target_data)
torch.Size([22, 900])
torch.Size([22, 900, 102])
The parameters and model are listed below:
source = input_tensor.to(device)
target = target_tensor.to(device)
input_size = 900 # I am not sure if this is correct
output_size = (900,102) # I am not sure if this is correct
print('Input : {} Output : {}'.format(input_size, output_size))
embed_size = 256
hidden_size = 512
num_layers = 3
num_iteration = 100
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
encoder = Encoder(input_size, hidden_size, embed_size, num_layers, ENC_DROPOUT)
decoder = Decoder(output_size, hidden_size, embed_size, num_layers, DEC_DROPOUT)
model = Seq2Seq(encoder, decoder, device).to(device)
I then tried to train the model with:
model = trainModel(model, source, target, num_iteration)
The above results in the error message shown below:
TypeError: empty(): argument 'size' must be tuple of ints, but found element of type tuple at pos 2
I revised output_size = [900,102] to output_size = 900, but I got the error below:
IndexError: too many indices for tensor of dimension 2
I am having problems with the Encoder/Decoder model that can’t deal with the input and target tensors having different dimensions. Any help or advice on how to create an Encoder/Decoder model that will take input and target tensors in different dimensions will be greatly appreciated.
Thank you in advance for your input.

RuntimeError: mat1 and mat2 shapes cannot be multiplied when use BiGRU?

can someone figure out what cause this error? I am working on NMT and BiGRU to transfer grammatically incorrect Arabic sentence into grammatically correct ones. I'm not sure how BiGRU should work because the original code was GRU only.
The problem started when I converted the GRU to bidirectional.
This is the code:
#Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE,ENC_DROPOUT)
encoder.to(device)
#obtain one sample from the data iterator
it = iter(dataset)
x, y, x_len = next(it)
print("Input: ", x.shape)
print("Output: ", y.shape)
#sort the batch first to be able to use with pac_pack_sequence
xs, ys, lens = sort_batch(x, y, x_len)
enc_output, enc_hidden = encoder(xs.to(device), lens, device)
print("Encoder Output: ", enc_output.shape) # batch_size X max_length X enc_units
print("Encoder Hidden: ", enc_hidden.shape) # batch_size X enc_units (corresponds to the last state)
decoder = Decoder(vocab_tar_size, embedding_dim, units, units, BATCH_SIZE, DEC_DROPOUT)
decoder = decoder.to(device)
#print(enc_hidden.squeeze(0).shape)
dec_hidden = enc_hidden#.squeeze(0)
dec_input = torch.tensor([[targ_lang.word2idx['<start>']]] * BATCH_SIZE)
print("Decoder Input: ", dec_input.shape)
print("--------")
for t in range(1, y.size(1)):
print(dec_input.shape)
print(dec_hidden.shape)
print(enc_output.shape)
# enc_hidden: 1, batch_size, enc_units
# output: max_length, batch_size, enc_units
predictions, dec_hidden, _ = decoder(dec_input.to(device), dec_hidden.to(device), enc_output.to(device))
print("Prediction: ", predictions.shape)
print("Decoder Hidden: ", dec_hidden.shape)
#loss += loss_function(y[:, t].to(device), predictions.to(device))
dec_input = y[:, t].unsqueeze(1)
print(dec_input.shape)
break
And this is the Encoder code:
def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz, dropout):
super(Encoder, self).__init__()
self.batch_sz = batch_sz
self.enc_units = enc_units
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
self.gru = nn.GRU(self.embedding_dim, self.enc_units,dropout = dropout, bidirectional=True)
self.dropout = nn.Dropout(dropout)
def forward(self, x, lens, device):
# x: batch_size, max_length
# x: batch_size, max_length, embedding_dim
x = self.embedding(x).to(device)
# x transformed = max_len X batch_size X embedding_dim
# x = x.permute(1,0,2)
x = pack_padded_sequence(x, lens) # unpad
self.hidden = self.initialize_hidden_state(device)
# output: max_length, batch_size, enc_units
# self.hidden: 1, batch_size, enc_units
output, self.hidden = self.gru(x, self.hidden) # gru returns hidden state of all timesteps as well as hidden state at last timestep
# pad the sequence to the max length in the batch
output, _ = pad_packed_sequence(output)
return output, self.hidden
def initialize_hidden_state(self,device):
return torch.zeros((2, self.batch_sz, self.enc_units)).to(device)```

RuntimeError: mat1 dim 1 must match mat2 dim 0 for Sequential of DINO model

I’m getting the runtime error when trying to Sequentialise my linear layer with a DINO backbone from torch.hub.
import torch
import torch.distributed as dist
class LinearClassifier(torch.nn.Module):
def __init__(self, dim, num_labels=1000):
super(LinearClassifier, self).__init__()
self.num_labels = num_labels
self.linear = torch.nn.Linear(dim, num_labels)
self.linear.weight.data.normal_(mean=0.0, std=0.01)
self.linear.bias.data.zero_()
def forward(self, x):
# flatten
x = x.view(x.size(0), -1)
# linear layer
return self.linear(x)
dist.init_process_group('gloo', init_method='file:///tmp/somefile', rank=0, world_size=1)
# load backbone
model = torch.hub.load('facebookresearch/dino:main', 'dino_vits8')
#Setup linear layer
linear_classifier = LinearClassifier(1536, 1000)
linear_classifier = linear_classifier.cuda()
linear_classifier = torch.nn.parallel.DistributedDataParallel(linear_classifier)
state_dict = torch.hub.load_state_dict_from_url(url="https://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_linearweights.pth")['state_dict']
linear_classifier.load_state_dict(state_dict, strict=True)
#Sequentialise
model = torch.nn.Sequential(model,
linear_classifier)
x = torch.ones((1, 3, 224, 224))
out = model(x)
print("out: " + out)
Here is the print of the last layers of my sequentialised model:
last layers printed
It loos lke the output of the model(x) (as defined by model = torch.hub...) has shape 1 x 384, but your linear_classifier expects something of shape _ x 1536 which is why you'll get this error. So you can just adjust that number of inputs by setting
linear_classifier = LinearClassifier(384, 1000)

conv1d() received an invalid combination of arguments

I tried to repeat https://github.com/munhouiani/Deep-Packet and came across an error
This program uses CNN to classify network traffic. I decided to rewrite the program as I could not run the original on my computer. I am new to neural networks, so I cannot give a detailed description of the problem
TypeError: conv1d() received an invalid combination of arguments - got (list, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!list!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!list!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
code:
import torch
from pathlib import Path
from torch import nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from argparse import Namespace
from pytorch_lightning import Trainer
import pytorch_lightning as pl
import numpy as np
class CNN(pl.LightningModule):
def __init__(self, hparams):
super().__init__()
# config
self.save_hyperparameters(hparams)
self.data_path = self.hparams.data_path
# two convolution, then one max pool
self.conv1 = nn.Sequential(
nn.Conv1d(
in_channels=1,
out_channels=self.hparams.c1_output_dim,
kernel_size=self.hparams.c1_kernel_size,
stride=self.hparams.c1_stride
),
nn.ReLU()
)
self.conv2 = nn.Sequential(
nn.Conv1d(
in_channels=self.hparams.c1_output_dim,
out_channels=self.hparams.c2_output_dim,
kernel_size=self.hparams.c2_kernel_size,
stride=self.hparams.c2_stride
),
nn.ReLU()
)
self.max_pool = nn.MaxPool1d(
kernel_size=2
)
# flatten, calculate the output size of max pool
# use a dummy input to calculate
dummy_x = torch.rand(1, 1, self.hparams.signal_length, requires_grad=False)
dummy_x = self.conv1(dummy_x)
dummy_x = self.conv2(dummy_x)
dummy_x = self.max_pool(dummy_x)
max_pool_out = dummy_x.view(1, -1).shape[1]
# followed by 5 dense layers
self.fc1 = nn.Sequential(
nn.Linear(
in_features=max_pool_out,
out_features=200
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(
in_features=200,
out_features=100
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc3 = nn.Sequential(
nn.Linear(
in_features=100,
out_features=50
),
nn.Dropout(p=0.05),
nn.ReLU()
)
# finally, output layer
self.out = nn.Linear(
in_features=50,
out_features=self.hparams.output_dim
)
def forward(self, x):
# make sure the input is in [batch_size, channel, signal_length]
# where channel is 1
# signal_length is 1500 by default
#batch_size = x.shape[0]
batch_size = 16
# 2 conv 1 max
x = self.conv1(x)
x = self.conv2(x)
x = self.max_pool(x)
x = x.reshape(batch_size, -1)
# 3 fc
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
# output
x = self.out(x)
return x
def train_dataloader(self):
reader = self.data_path
dataloader = DataLoader(reader, batch_size=16)
return dataloader
def configure_optimizers(self):
return torch.optim.Adam(self.parameters())
def training_step(self, batch, batch_idx):
x = batch
y = batch
y_hat = self(x)
loss = F.cross_entropy(y_hat, y)
if (batch_idx % 50) == 0:
self.logger.log_metrics(loss, step=batch_idx)
return loss
num_epochs = 6
num_classes = 10
batch_size = 100
learning_rate = 0.001
train_dataset = "D:\Deep-Packet-master\Deep-Packet-master\processed_data"
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
hparams = Namespace(**{
'c1_kernel_size': 4,
'c1_output_dim': 200,
'c1_stride': 3,
'c2_kernel_size': 5,
'c2_output_dim': 200,
'c2_stride': 1,
'output_dim': 17,
'data_path': train_dataset,
'signal_length': 1500,
'epoch': 6
})
model = CNN(hparams).float()
gpus = None
trainer = Trainer(val_check_interval=4, max_epochs=1)
trainer.fit(model)
trainer.save_checkpoint(str(train_dataset.absolute()))
Please, help
I'm going to guess that your training_step is incorrect:
def training_step(self, batch, batch_idx):
x = batch[0]
y = batch[1]
y_hat = self(x)
loss = F.cross_entropy(y_hat, y)
if (batch_idx % 50) == 0:
self.logger.log_metrics(loss, step=batch_idx)
return loss
In your code, you set both x and y to batch which should be a tuple or a list, which conv1d's forward cannot interpret.

Transformer model not able to be saved

I'm trying to follow this tutrial https://colab.research.google.com/github/tensorflow/examples/blob/master/community/en/transformer_chatbot.ipynb, However, when I tried to save the model in order to load it again without training I got an error mentioned here NotImplementedError: Layers with arguments in `__init__` must override `get_config`
I understood from the answer that I need to make the encoder and decoder as classes and customise it(instead of leaving it as functions like the colab tutrial) so I went back to tensor flow documentation of this model here: https://www.tensorflow.org/tutorials/text/transformer#encoder_layer and tried to edit in it. I made the encoder layer as:
class EncoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, rate=0.1,**kwargs,):
#super(EncoderLayer, self).__init__()
super().__init__(**kwargs)
self.mha = MultiHeadAttention(d_model, num_heads)
self.ffn = point_wise_feed_forward_network(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = tf.keras.layers.Dropout(rate)
self.dropout2 = tf.keras.layers.Dropout(rate)
def get_config(self):
config = super().get_config().copy()
config.update({
#'vocab_size': self.vocab_size,
#'num_layers': self.num_layers,
#'units': self.units,
'd_model': self.d_model,
'num_heads': self.num_heads,
'dropout': self.dropout,
})
return config
def call(self, x, training, mask):
attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model)
ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model)
return out2
and same for the decoder layer class. Then the same encoder in the documentation of tf
class Encoder(tf.keras.layers.Layer):
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
maximum_position_encoding, rate=0.1):
super(Encoder, self).__init__()
self.d_model = d_model
self.num_layers = num_layers
self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
self.pos_encoding = positional_encoding(maximum_position_encoding,
self.d_model)
self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)
for _ in range(num_layers)]
self.dropout = tf.keras.layers.Dropout(rate)
def call(self, x, training, mask):
seq_len = tf.shape(x)[1]
# adding embedding and position encoding.
x = self.embedding(x) # (batch_size, input_seq_len, d_model)
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
x += self.pos_encoding[:, :seq_len, :]
x = self.dropout(x, training=training)
for i in range(self.num_layers):
x = self.enc_layers[i](x, training, mask)
return x # (batch_size, input_seq_len, d_model)
the function of the model as:
def transformer(vocab_size,
num_layers,
units,
d_model,
num_heads,
dropout,
name="transformer"):
inputs = tf.keras.Input(shape=(None,), name="inputs")
dec_inputs = tf.keras.Input(shape=(None,), name="dec_inputs")
enc_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='enc_padding_mask')(inputs)
# mask the future tokens for decoder inputs at the 1st attention block
look_ahead_mask = tf.keras.layers.Lambda(
create_look_ahead_mask,
output_shape=(1, None, None),
name='look_ahead_mask')(dec_inputs)
# mask the encoder outputs for the 2nd attention block
dec_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='dec_padding_mask')(inputs)
enc_outputs = Encoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
input_vocab_size=vocab_size,
)(inputs=[inputs, enc_padding_mask])
dec_outputs = Decoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
target_vocab_size=vocab_size,
)(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask])
outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs)
return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)
and calling the model:
#the model itself with its paramters:
# Hyper-parameters
NUM_LAYERS = 3
D_MODEL = 256
#D_MODEL=tf.cast(D_MODEL, tf.float32)
NUM_HEADS = 8
UNITS = 512
DROPOUT = 0.1
model = transformer(
vocab_size=VOCAB_SIZE,
num_layers=NUM_LAYERS,
units=UNITS,
d_model=D_MODEL,
num_heads=NUM_HEADS,
dropout=DROPOUT)
However, I got that error:
TypeError: __init__() missing 2 required positional arguments: 'dff' and 'maximum_position_encoding'
I am really confused and I don't understand what dff and maximum position encoding mean in the documentation and when I removed them from the encoder and decoder classes, I got anther error as positional_encoding function takes maximum position as input and also dff is passed as input inside the class. I am not so sure what I should do as I am not sure whether I am following the right steps or not
If you get this error while calling transformer then your problem is with creating the model, not saving it.
Other than that, I see several issues with your get_config:
You defined dropout instead of rate.
The attributes you address (self.d_model etc.) are not defined or assigned at __init__.
It doesn't exist for your Encoder class.

Categories