conv1d() received an invalid combination of arguments - python

I tried to repeat https://github.com/munhouiani/Deep-Packet and came across an error
This program uses CNN to classify network traffic. I decided to rewrite the program as I could not run the original on my computer. I am new to neural networks, so I cannot give a detailed description of the problem
TypeError: conv1d() received an invalid combination of arguments - got (list, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!list!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!list!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
code:
import torch
from pathlib import Path
from torch import nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from argparse import Namespace
from pytorch_lightning import Trainer
import pytorch_lightning as pl
import numpy as np
class CNN(pl.LightningModule):
def __init__(self, hparams):
super().__init__()
# config
self.save_hyperparameters(hparams)
self.data_path = self.hparams.data_path
# two convolution, then one max pool
self.conv1 = nn.Sequential(
nn.Conv1d(
in_channels=1,
out_channels=self.hparams.c1_output_dim,
kernel_size=self.hparams.c1_kernel_size,
stride=self.hparams.c1_stride
),
nn.ReLU()
)
self.conv2 = nn.Sequential(
nn.Conv1d(
in_channels=self.hparams.c1_output_dim,
out_channels=self.hparams.c2_output_dim,
kernel_size=self.hparams.c2_kernel_size,
stride=self.hparams.c2_stride
),
nn.ReLU()
)
self.max_pool = nn.MaxPool1d(
kernel_size=2
)
# flatten, calculate the output size of max pool
# use a dummy input to calculate
dummy_x = torch.rand(1, 1, self.hparams.signal_length, requires_grad=False)
dummy_x = self.conv1(dummy_x)
dummy_x = self.conv2(dummy_x)
dummy_x = self.max_pool(dummy_x)
max_pool_out = dummy_x.view(1, -1).shape[1]
# followed by 5 dense layers
self.fc1 = nn.Sequential(
nn.Linear(
in_features=max_pool_out,
out_features=200
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(
in_features=200,
out_features=100
),
nn.Dropout(p=0.05),
nn.ReLU()
)
self.fc3 = nn.Sequential(
nn.Linear(
in_features=100,
out_features=50
),
nn.Dropout(p=0.05),
nn.ReLU()
)
# finally, output layer
self.out = nn.Linear(
in_features=50,
out_features=self.hparams.output_dim
)
def forward(self, x):
# make sure the input is in [batch_size, channel, signal_length]
# where channel is 1
# signal_length is 1500 by default
#batch_size = x.shape[0]
batch_size = 16
# 2 conv 1 max
x = self.conv1(x)
x = self.conv2(x)
x = self.max_pool(x)
x = x.reshape(batch_size, -1)
# 3 fc
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
# output
x = self.out(x)
return x
def train_dataloader(self):
reader = self.data_path
dataloader = DataLoader(reader, batch_size=16)
return dataloader
def configure_optimizers(self):
return torch.optim.Adam(self.parameters())
def training_step(self, batch, batch_idx):
x = batch
y = batch
y_hat = self(x)
loss = F.cross_entropy(y_hat, y)
if (batch_idx % 50) == 0:
self.logger.log_metrics(loss, step=batch_idx)
return loss
num_epochs = 6
num_classes = 10
batch_size = 100
learning_rate = 0.001
train_dataset = "D:\Deep-Packet-master\Deep-Packet-master\processed_data"
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
hparams = Namespace(**{
'c1_kernel_size': 4,
'c1_output_dim': 200,
'c1_stride': 3,
'c2_kernel_size': 5,
'c2_output_dim': 200,
'c2_stride': 1,
'output_dim': 17,
'data_path': train_dataset,
'signal_length': 1500,
'epoch': 6
})
model = CNN(hparams).float()
gpus = None
trainer = Trainer(val_check_interval=4, max_epochs=1)
trainer.fit(model)
trainer.save_checkpoint(str(train_dataset.absolute()))
Please, help

I'm going to guess that your training_step is incorrect:
def training_step(self, batch, batch_idx):
x = batch[0]
y = batch[1]
y_hat = self(x)
loss = F.cross_entropy(y_hat, y)
if (batch_idx % 50) == 0:
self.logger.log_metrics(loss, step=batch_idx)
return loss
In your code, you set both x and y to batch which should be a tuple or a list, which conv1d's forward cannot interpret.

Related

How to Use Encoder/Decoder Model with Different Dimensions of Input/Target

My Input data and target data are different types of data, and in different dimensions. The input data is 1D, and the target data is 2D. I don’t know how to modify the model to fit this situation.
I have built an Encoder/Decoder model with tokenized MIDI data as input, and coordinate data in CSV format as output.
The input dimension is
(num_of_data, sequence_length, data_dimension) = (22, 1000~3000, 1)
The target dimension is
(num_of_data, sequence_length, data_dimension) = (22, 1000~3000, 102)
The Encoder/Decoder model architecture I found :
class Encoder(nn.Module):
def __init__(self,
input_size = 2,
embedding_size = 128,
hidden_size = 256,
n_layers = 4,
dropout = 0.5):
super().__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.linear = nn.Linear(input_size, embedding_size)
self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers,
dropout = dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
embedded = self.dropout(F.relu(self.linear(x)))
output, (hidden, cell) = self.rnn(embedded)
return hidden, cell
class Decoder(nn.Module):
def __init__(self,
output_size = 2,
embedding_size = 128,
hidden_size = 256,
n_layers = 4,
dropout = 0.5):
super().__init__()
self.output_size = output_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.embedding = nn.Linear(output_size, embedding_size)
self.rnn = nn.LSTM(embedding_size, hidden_size, n_layers, dropout = dropout)
self.linear = nn.Linear(hidden_size, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x, hidden, cell):
x = x.unsqueeze(0)
embedded = self.dropout(F.relu(self.embedding(x)))
output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
prediction = self.linear(output.squeeze(0))
return prediction, hidden, cell
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device
assert encoder.hidden_size == decoder.hidden_size, \
"Hidden dimensions of encoder and decoder must be equal!"
assert encoder.n_layers == decoder.n_layers, \
"Encoder and decoder must have equal number of layers!"
def forward(self, x, y, teacher_forcing_ratio = 0.5):
batch_size = x.shape[1]
target_len = y.shape[0]
outputs = torch.zeros(y.shape).to(self.device)
hidden, cell = self.encoder(x)
decoder_input = x[-1, :, :]
for i in range(target_len):
output, hidden, cell = self.decoder(decoder_input, hidden, cell)
outputs[i] = output
teacher_forcing = random.random() < teacher_forcing_ratio
decoder_input = y[i] if teacher_forcing else output
return outputs
I set the input data and target data length to 900 as they need to have the same sequence_length before I convert them to tensor:
tokenized_data shape: (22, n)
target_data shape: (22, m, 102)
↓
tokenized_data shape: (22, 900)
target_data shape: (22, 900, 102)
input_tensor = torch.Tensor(input_data)
target_tensor = torch.Tensor(target_data)
torch.Size([22, 900])
torch.Size([22, 900, 102])
The parameters and model are listed below:
source = input_tensor.to(device)
target = target_tensor.to(device)
input_size = 900 # I am not sure if this is correct
output_size = (900,102) # I am not sure if this is correct
print('Input : {} Output : {}'.format(input_size, output_size))
embed_size = 256
hidden_size = 512
num_layers = 3
num_iteration = 100
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
encoder = Encoder(input_size, hidden_size, embed_size, num_layers, ENC_DROPOUT)
decoder = Decoder(output_size, hidden_size, embed_size, num_layers, DEC_DROPOUT)
model = Seq2Seq(encoder, decoder, device).to(device)
I then tried to train the model with:
model = trainModel(model, source, target, num_iteration)
The above results in the error message shown below:
TypeError: empty(): argument 'size' must be tuple of ints, but found element of type tuple at pos 2
I revised output_size = [900,102] to output_size = 900, but I got the error below:
IndexError: too many indices for tensor of dimension 2
I am having problems with the Encoder/Decoder model that can’t deal with the input and target tensors having different dimensions. Any help or advice on how to create an Encoder/Decoder model that will take input and target tensors in different dimensions will be greatly appreciated.
Thank you in advance for your input.

RuntimeError: mat1 dim 1 must match mat2 dim 0 for Sequential of DINO model

I’m getting the runtime error when trying to Sequentialise my linear layer with a DINO backbone from torch.hub.
import torch
import torch.distributed as dist
class LinearClassifier(torch.nn.Module):
def __init__(self, dim, num_labels=1000):
super(LinearClassifier, self).__init__()
self.num_labels = num_labels
self.linear = torch.nn.Linear(dim, num_labels)
self.linear.weight.data.normal_(mean=0.0, std=0.01)
self.linear.bias.data.zero_()
def forward(self, x):
# flatten
x = x.view(x.size(0), -1)
# linear layer
return self.linear(x)
dist.init_process_group('gloo', init_method='file:///tmp/somefile', rank=0, world_size=1)
# load backbone
model = torch.hub.load('facebookresearch/dino:main', 'dino_vits8')
#Setup linear layer
linear_classifier = LinearClassifier(1536, 1000)
linear_classifier = linear_classifier.cuda()
linear_classifier = torch.nn.parallel.DistributedDataParallel(linear_classifier)
state_dict = torch.hub.load_state_dict_from_url(url="https://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_linearweights.pth")['state_dict']
linear_classifier.load_state_dict(state_dict, strict=True)
#Sequentialise
model = torch.nn.Sequential(model,
linear_classifier)
x = torch.ones((1, 3, 224, 224))
out = model(x)
print("out: " + out)
Here is the print of the last layers of my sequentialised model:
last layers printed
It loos lke the output of the model(x) (as defined by model = torch.hub...) has shape 1 x 384, but your linear_classifier expects something of shape _ x 1536 which is why you'll get this error. So you can just adjust that number of inputs by setting
linear_classifier = LinearClassifier(384, 1000)

No performance improvement using quantization model in pytorch

I have trained a model in pytorch with float data type. I want to improve my inference time by converting this model to quantized model. I have used torch.quantization.convert api to convert my model's weight to uint8 data type. However, when I use this model for inference, I do not get any performance improvement. Am I doing something wrong here ?
The Unet Model code:
def gen_initialization(m):
if type(m) == nn.Conv2d:
sh = m.weight.shape
nn.init.normal_(m.weight, std=math.sqrt(2.0 / (sh[0]*sh[2]*sh[3])))
nn.init.constant_(m.bias, 0)
elif type(m) == nn.BatchNorm2d:
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
class TripleConv(nn.Module):
def __init__(self, in_ch, out_ch):
super(TripleConv, self).__init__()
mid_ch = (in_ch + out_ch) // 2
self.conv = nn.Sequential(
nn.Conv2d(in_ch, mid_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(num_features=mid_ch),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(mid_ch, mid_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(num_features=mid_ch),
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2d(mid_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(num_features=out_ch),
nn.LeakyReLU(negative_slope=0.1)
)
self.conv.apply(gen_initialization)
def forward(self, x):
return self.conv(x)
class Down(nn.Module):
def __init__(self, in_ch, out_ch):
super(Down, self).__init__()
self.triple_conv = TripleConv(in_ch, out_ch)
self.avg_pool_conv = nn.AvgPool2d(2, 2)
self.in_ch = in_ch
self.out_ch = out_ch
def forward(self, x):
self.cache = self.triple_conv(x)
pad = torch.zeros(x.shape[0], self.out_ch - self.in_ch, x.shape[2], x.shape[3], device=x.device)
x = torch.cat((x, pad), dim=1)
self.cache += x
return self.avg_pool_conv(self.cache)
class Center(nn.Module):
def __init__(self, in_ch, out_ch):
super(Center, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=True),
nn.BatchNorm2d(num_features=out_ch),
nn.LeakyReLU(negative_slope=0.1, inplace=True)
)
self.conv.apply(gen_initialization)
def forward(self, x):
return self.conv(x)
class Up(nn.Module):
def __init__(self, in_ch, out_ch):
super(Up, self).__init__()
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear',
align_corners=True)
self.triple_conv = TripleConv(in_ch, out_ch)
def forward(self, x, cache):
x = self.upsample(x)
x = torch.cat((x, cache), dim=1)
x = self.triple_conv(x)
return x
class UNet(nn.Module):
def __init__(self, in_ch, first_ch=None):
super(UNet, self).__init__()
if not first_ch:
first_ch = 32
self.down1 = Down(in_ch, first_ch)
self.down2 = Down(first_ch, first_ch*2)
self.down3 = Down(first_ch*2, first_ch*4)
self.down4 = Down(first_ch*4, first_ch*8)
self.center = Center(first_ch*8, first_ch*8)
self.up4 = Up(first_ch*8*2, first_ch*4)
self.up3 = Up(first_ch*4*2, first_ch*2)
self.up2 = Up(first_ch*2*2, first_ch)
self.up1 = Up(first_ch*2, first_ch)
self.output = nn.Conv2d(first_ch, in_ch, kernel_size=3, stride=1,
padding=1, bias=True)
self.output.apply(gen_initialization)
def forward(self, x):
x = self.down1(x)
x = self.down2(x)
x = self.down3(x)
x = self.down4(x)
x = self.center(x)
x = self.up4(x, self.down4.cache)
x = self.up3(x, self.down3.cache)
x = self.up2(x, self.down2.cache)
x = self.up1(x, self.down1.cache)
return self.output(x)
The inference code:
from tqdm import tqdm
import os
import numpy as np
import torch
import gan_network
import torch.nn.parallel
from torch.utils.data import DataLoader
import torch.utils.data as data
import random
import glob
import scipy.io
import time
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"
class DataFolder(data.Dataset):
def __init__(self, file):
super(DataFolder, self).__init__()
self.image_names = []
fid = file
for line in fid:
# line = line[:-1]
if line == '':
continue
# print(line)
self.image_names.append(line)
random.shuffle(self.image_names)
self.image_names = self.image_names[0:]
def __len__(self):
return len(self.image_names)
def __getitem__(self, index):
path = self.image_names[index]
img = np.load(path)
img = np.rollaxis(img, 2, 0)
img = torch.from_numpy(img[:, :, :])
return img, path
if __name__ == '__main__':
batch_size = 1
image_size = 2048
channels = 6
model_path = 'D:/WorkProjects/Network_Training_Aqusens/FullFovReconst/network/network_epoch9.pth'
test_data = glob.glob('D:/save/temp/*.npy')
dest_dir = 'D:/save/temp/results/'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = gan_network.UNet(6, 32)
if torch.cuda.device_count() > 1:
net = torch.nn.DataParallel(net)
net.to(device)
net.load_state_dict(torch.load(model_path))
quantized_model = torch.quantization.convert(net, {torch.nn.Conv2d, torch.nn.BatchNorm2d}, inplace=False)
dataset = DataFolder(file=test_data)
print(f'{len(dataset)}')
data_loader = DataLoader(dataset=dataset, num_workers=4,
batch_size=batch_size, shuffle=False,
drop_last=False, pin_memory=True)
input = torch.Tensor(batch_size, channels, image_size, image_size).to(device)
t0 = time.time()
with torch.no_grad():
for i, batch in enumerate(tqdm(data_loader)):
input.copy_(batch[0])
output = net(input).cpu().clone().numpy()
np.array(output)
output = np.rollaxis(output, 1, 4)
for num in range(batch_size):
arr = output[num, :, :, :]
file_name = os.path.basename(batch[1][num])
save_name = os.path.join(dest_dir, file_name)
save_name = save_name.replace(".npy", "")
scipy.io.savemat(save_name+'.mat', {'output': arr})
t1 = time.time()
print(f'Elapsed time = {t1-t0}')
For models net and quantized model, i get the elapsed time around 30 seconds for 12 images passed through them.
PyTorch documentation suggests three ways to perform quantization. You are doing post-training dynamic quantization (the simplest quantization method available) which only supports torch.nn.Linear and torch.nn.LSTM layers as listed here. To quantize CNN layers, you would want to check out the other two techniques (these are the ones that support CNN layers): post-training static quantization and quantization aware training. This tutorial shows both these techniques applied on CNNs.
Have tried out static quantization approach on Yolov5, it cuts 73% of size and decreases inference time by ~ 13-15%. It seems to me, that conv layers gains less than linear in latency, because same static approach on toy MNIST fully-dense net decreased inference time by 4 times.

Transformer model not able to be saved

I'm trying to follow this tutrial https://colab.research.google.com/github/tensorflow/examples/blob/master/community/en/transformer_chatbot.ipynb, However, when I tried to save the model in order to load it again without training I got an error mentioned here NotImplementedError: Layers with arguments in `__init__` must override `get_config`
I understood from the answer that I need to make the encoder and decoder as classes and customise it(instead of leaving it as functions like the colab tutrial) so I went back to tensor flow documentation of this model here: https://www.tensorflow.org/tutorials/text/transformer#encoder_layer and tried to edit in it. I made the encoder layer as:
class EncoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, rate=0.1,**kwargs,):
#super(EncoderLayer, self).__init__()
super().__init__(**kwargs)
self.mha = MultiHeadAttention(d_model, num_heads)
self.ffn = point_wise_feed_forward_network(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = tf.keras.layers.Dropout(rate)
self.dropout2 = tf.keras.layers.Dropout(rate)
def get_config(self):
config = super().get_config().copy()
config.update({
#'vocab_size': self.vocab_size,
#'num_layers': self.num_layers,
#'units': self.units,
'd_model': self.d_model,
'num_heads': self.num_heads,
'dropout': self.dropout,
})
return config
def call(self, x, training, mask):
attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model)
ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model)
return out2
and same for the decoder layer class. Then the same encoder in the documentation of tf
class Encoder(tf.keras.layers.Layer):
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
maximum_position_encoding, rate=0.1):
super(Encoder, self).__init__()
self.d_model = d_model
self.num_layers = num_layers
self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
self.pos_encoding = positional_encoding(maximum_position_encoding,
self.d_model)
self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)
for _ in range(num_layers)]
self.dropout = tf.keras.layers.Dropout(rate)
def call(self, x, training, mask):
seq_len = tf.shape(x)[1]
# adding embedding and position encoding.
x = self.embedding(x) # (batch_size, input_seq_len, d_model)
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
x += self.pos_encoding[:, :seq_len, :]
x = self.dropout(x, training=training)
for i in range(self.num_layers):
x = self.enc_layers[i](x, training, mask)
return x # (batch_size, input_seq_len, d_model)
the function of the model as:
def transformer(vocab_size,
num_layers,
units,
d_model,
num_heads,
dropout,
name="transformer"):
inputs = tf.keras.Input(shape=(None,), name="inputs")
dec_inputs = tf.keras.Input(shape=(None,), name="dec_inputs")
enc_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='enc_padding_mask')(inputs)
# mask the future tokens for decoder inputs at the 1st attention block
look_ahead_mask = tf.keras.layers.Lambda(
create_look_ahead_mask,
output_shape=(1, None, None),
name='look_ahead_mask')(dec_inputs)
# mask the encoder outputs for the 2nd attention block
dec_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='dec_padding_mask')(inputs)
enc_outputs = Encoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
input_vocab_size=vocab_size,
)(inputs=[inputs, enc_padding_mask])
dec_outputs = Decoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
target_vocab_size=vocab_size,
)(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask])
outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs)
return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)
and calling the model:
#the model itself with its paramters:
# Hyper-parameters
NUM_LAYERS = 3
D_MODEL = 256
#D_MODEL=tf.cast(D_MODEL, tf.float32)
NUM_HEADS = 8
UNITS = 512
DROPOUT = 0.1
model = transformer(
vocab_size=VOCAB_SIZE,
num_layers=NUM_LAYERS,
units=UNITS,
d_model=D_MODEL,
num_heads=NUM_HEADS,
dropout=DROPOUT)
However, I got that error:
TypeError: __init__() missing 2 required positional arguments: 'dff' and 'maximum_position_encoding'
I am really confused and I don't understand what dff and maximum position encoding mean in the documentation and when I removed them from the encoder and decoder classes, I got anther error as positional_encoding function takes maximum position as input and also dff is passed as input inside the class. I am not so sure what I should do as I am not sure whether I am following the right steps or not
If you get this error while calling transformer then your problem is with creating the model, not saving it.
Other than that, I see several issues with your get_config:
You defined dropout instead of rate.
The attributes you address (self.d_model etc.) are not defined or assigned at __init__.
It doesn't exist for your Encoder class.

One-hot encoding in pytorch/torchtext

I have a Bucketiterator from torchtext that I feed to a model in pytorch. An example of how the iterator is constructed:
train_iter, val_iter = BucketIterator.splits((train,val),
batch_size=batch_size,
sort_within_batch = True,
device = device,
shuffle=True,
sort_key=lambda x: (len(x.src), len(x.trg)))
The data is then fed to a model like this, where I use the nn.Embedding layer.
class encoder(nn.Module):
def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
super().__init__()
self.input_dim = input_dim
self.emb_dim = emb_dim
self.hid_dim = hid_dim
self.n_layers = n_layers
self.dropout = dropout
self.embedding = nn.Embedding(input_dim, emb_dim)
self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, src):
#src = [src sent len, batch size]
embedded = self.dropout(self.embedding(src))
#embedded = [src sent len, batch size, emb dim]
hidden_enc = []
outputs, hidden = self.rnn(embedded[0,:,:].unsqueeze(0))
for i in range(1,len(embedded[:,1,1])):
outputs, hidden = self.rnn(embedded[i,:,:].unsqueeze(0),hidden)
hidden_cpu = []
for k in range(len(hidden)):
hidden_cpu.append(hidden[k])
hidden_cpu[k] = hidden[k].cpu()
hidden_enc.append(tuple(hidden_cpu))
#outputs, hidden = self.rnn(embedded)
#outputs = [src sent len, batch size, hid dim * n directions]
#hidden = [n layers * n directions, batch size, hid dim]
#cell = [n layers * n directions, batch size, hid dim]
None
#outputs are always from the top hidden layer
return hidden, hidden_enc
But what if I wanted the embedding to be one-hot encoded? I work on formal languages and it would be nice to preserve orthogonality between tokens. It doesn't seem like pytorch or torchtext has any functionality for doing this.
def get_one_hot_torch_tensor(in_tensor):
"""
Function converts a 1d or 2d torch tensor to one-hot encoded
"""
n_channels = torch.max(in_tensor)+1 # maximum number of channels
if in_tensor.ndim == 2:
out_one_hot = torch.zeros((n_channels, in_tensor.shape[0], in_tensor.shape[1]))
# print(out_one_hot)
index = np.indices((in_tensor.shape[0], in_tensor.shape[1])) # create an array of indices
x, y = index[0], index[1]
print(x, y)
out_one_hot[in_tensor, x, y] = 1
print(out_one_hot)

Categories