Related
enter image description here
So we've translated this image into a code.
class pre_h(nn.Module):
def __init__(self, vocab_size, hidden_size, window_size):
super().__init__()
self.window_size = window_size
self.in_layer = nn.Embedding(vocab_size, hidden_size)
self.weight_list = []
def forward(self, contexts):
for i in range(2 * self.window_size):
weight = self.in_layer(contexts[:,i])
self.weight_list.append(weight)
h = sum(self.weight_list)
h = h / len(self.weight_list)
return h
class UnigramSampler:
def __init__(self, corpus, power, sample_size):
super().__init__()
self.sample_size = sample_size
self.vocab_size = None
self.word_p = None
counts = collections.Counter()
for word_id in corpus:
counts[word_id] += 1
vocab_size = len(counts)
self.vocab_size = vocab_size
self.word_p = np.zeros(vocab_size)
for i in range(vocab_size):
self.word_p[i] = counts[i]
self.word_p = np.power(self.word_p, power)
self.word_p /= np.sum(self.word_p)
def get_negative_sample(self, target):
batch_size = target.shape[0]
negative_sample = np.random.choice(self.vocab_size, size=(batch_size, self.sample_size), replace=True, p=self.word_p)
negative_sample = torch.tensor(negative_sample)
return negative_sample
class NegativeSampling(nn.Module):
def __init__(self, vocab_size, hidden_size, corpus, power = 0.75, sample_size = 5):
super().__init__()
self.sample_size = sample_size
self.sampler = UnigramSampler(corpus, power, sample_size)
self.embedding = nn.Embedding(vocab_size, hidden_size)
self.sigmoid = nn.Sigmoid()
self.loss_layer = nn.CrossEntropyLoss()
self.hidden_size = hidden_size
def forward(self, h, target):
loss_data = 0
w = self.embedding(target)
w = torch.squeeze(w).reshape(self.hidden_size, -1)
loss = h # w
loss = self.sigmoid(loss)
correct_label = torch.ones_like(loss)
loss = self.loss_layer(loss, correct_label)
loss_data += loss
negative_target = self.sampler.get_negative_sample(target)
for i in range(self.sample_size):
w = self.embedding(negative_target[:,i])
w = torch.squeeze(w).reshape(self.hidden_size, -1)
loss = h # w
loss = self.sigmoid(loss)
negative_label = torch.zeros_like(loss)
loss = self.loss_layer(loss, negative_label)
return loss_data
class sample_cbow(nn.Module):
def __init__(self, vocab_size, hidden_size, corpus, window_size, power = 0.75, sample_size = 5):
super().__init__()
self.model_1 = pre_h(vocab_size, hidden_size, window_size)
self.model_2 = NegativeSampling(vocab_size, hidden_size, corpus, power, sample_size)
def forward(self, contexts, target):
out = self.model_1(contexts)
out = self.model_2(out, target)
return out
vocab_size = len(id_to_word)
model = sample_cbow(vocab_size, 100, corpus, 1)
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate) # 아담으로 옵티마이저 설정
i = 1
for contexts, target in dataloader:
print('===================================')
print(i,'experiment')
contexts.to(device)
target.to(device)
model.to(device)
output = model(contexts, target)
optimizer.zero_grad()
output.backward()
optimizer.step()
Here is the dataset I used. I'm leaving this just in case.
class Corpus_Dataset(Dataset):
def __init__(self, corpus, window_size = 1):
contexts, target = create_contexts_target(corpus, window_size)
self.contexts = contexts
self.target = target
def __len__(self):
return len(self.contexts)
def __getitem__(self, idx):
contexts = torch.tensor(self.contexts[idx], dtype = torch.long)
target = torch.tensor(self.target[idx], dtype = torch.long)
return contexts, target
dataset = Corpus_Dataset(corpus, window_size = 2)
dataloader = DataLoader(dataset, batch_size = 100)
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward
We know that with backward(), the graph is no longer formed.
But I've only used one backward(), and I get this error even though I didn't recycle it again.
If the backward() of the model was not executed from the beginning, it would have been suspected as a problem with the model, but only the first or second time the backward() runs normally and does not proceed any further.
Clone() or detach() were used for more diverse tensors, but this error was not resolved.
I searched and thought about what the problem was, but I can't think of a solution anymore.
I'm sorry for my poor English.
I am trying to use the vanilla transformer from PyTorch using Pytorch Lightning. I tried to test the model with a reverse number task. So given [1, 3, 5, 4, 13, 19] it returns [1, 13, 4, 5, 3, 19] with 1, 19 being start and end token respectively. The full code is below. The code can run without error but there seems to be a problem with the backpropagation. The training loss does go down at first but it doesn't go beyond 2.8 and the accuracy doesn't go beyond 11%.
It seems that part of the model is able to optimize, I am guessing it is because the weights located in Embeddings and Generator can backpropagate, but weights located in nn.Transformer cannot? I am really not sure.
import math
import torch.nn.functional as F
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping
class Embeddings(pl.LightningModule):
def __init__(self, d_model, vocab):
super(Embeddings, self).__init__()
self.lut = nn.Embedding(vocab, d_model)
self.d_model = d_model
def forward(self, x):
a = self.lut(x) * math.sqrt(self.d_model)
return a
class PositionalEncoding(pl.LightningModule):
def __init__(self, d_model, dropout, max_len=5000):
super(PositionalEncoding, self).__init__()
self.dropout = nn.Dropout(p=dropout)
# Compute the positional encodings once in log space.
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2) *
-(math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
x = x + self.pe[:, :x.size(1)]
return self.dropout(x)
class Generator(pl.LightningModule):
def __init__(self, size):
super(Generator, self).__init__()
self.proj = nn.Linear(512, size)
def forward(self, x):
return F.log_softmax(self.proj(x), dim=-1)
class Model(pl.LightningModule):
def __init__(self, src_embed, tgt_embed, transformer, generator):
super(Model, self).__init__()
self.src_embed = src_embed
self.tgt_embed = tgt_embed
self.transformer = transformer
self.generator = generator
self.valLoss = 0
self.valAcc = 0
self.automatic_optimization = False
self.optimizer = None
for p in self.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p)
def forward(self, x, y, tgt_mask=None):
x = self.src_embed(x)
y = self.tgt_embed(y)
return self.generator(self.transformer(x, y, tgt_mask=tgt_mask))
def training_step(self, batch, batch_idx):
if self.optimizer is None:
self.optimizer = self.optimizers()
batch = Batch(batch[0], batch[1])
tgt_mask = batch.trg_mask.squeeze(0)
tgt_mask = (tgt_mask != True)
output = self(batch.src, batch.trg, tgt_mask)
criterion = LossCompute(V)
loss = criterion.forward(output.contiguous().view(-1, output.size(-1)), batch.trg_y.contiguous().view(-1)) / batch.ntokens
loss.backward()
self.optimizer.step()
self.optimizer.zero_grad()
self.log('train_loss', loss)
print(loss)
def validation_step(self, batch, batch_idx):
batch = Batch(batch[0], batch[1])
tgt_mask = batch.trg_mask.squeeze(0)
tgt_mask = (tgt_mask != True)
output = self(batch.src, batch.trg, tgt_mask)
criterion = LossCompute(V)
loss = criterion.forward(output.view(-1, output.size(-1)), batch.trg_y.contiguous().view(-1)) / batch.ntokens
self.log('val_loss', loss)
self.valLoss += loss
if batch_idx % 10 == 0:
print(loss)
if batch_idx == 99:
print(self.valLoss/100)
self.valLoss = 0
return {"x": output, "trg": batch.trg_y, "index": batch_idx}
def validation_step_end(self, batch):
output, trg, idx = batch["x"], batch["trg"], batch["index"]
accuracy = getAccuracy(output, trg)
self.log("accuracy", accuracy)
self.valAcc += accuracy
if idx == 99:
print(self.valAcc/100)
self.valAcc = 0
def train_dataloader(self):
data = data_gen(V, 0, 3000)
return DataLoader(data, batch_size=30, shuffle=False, num_workers=2, pin_memory=True)
def val_dataloader(self):
data = data_gen(V, 1, 1000)
return DataLoader(data, batch_size=10, shuffle=False, num_workers=2, pin_memory=True)
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=1e-3, betas=(0.9, 0.98), eps=1e-9)
class LossCompute(pl.LightningModule):
def __init__(self, size):
super(LossCompute, self).__init__()
self.criterion = nn.KLDivLoss(reduction='sum')
self.size = size
self.true_dist = None
def forward(self, x, target):
# x has size (batch_size x length, vocab_size)
assert x.size(1) == self.size
true_dist = x.data.clone()
true_dist.fill_(0)
true_dist.scatter_(1, target.data.unsqueeze(1).long(), 1)
self.true_dist = true_dist
return self.criterion(x, true_dist)
# prepare data
class Batch:
"Object for holding a batch of data with mask during training."
def __init__(self, src, trg=None):
self.src = src
if trg is not None:
self.trg = trg[:, :-1]
self.trg_y = trg[:, 1:]
self.trg_mask = \
self.make_std_mask(self.trg)
self.ntokens = self.trg_y.size(0) * self.trg_y.size(1)
print("")
#staticmethod
def make_std_mask(tgt):
"Create a mask to hide padding and future words."
tgt_mask = subsequent_mask(tgt.size(-1)).type_as(tgt.data)
return tgt_mask
def subsequent_mask(size):
"Mask out subsequent positions."
attn_shape = (1, size, size)
subsequent_mask = np.triu(np.ones(attn_shape), k=1).astype('uint8')
return torch.from_numpy(subsequent_mask) == 0
def data_gen(V, randomSeed, totalTrainingSample):
np.random.seed(randomSeed)
x = torch.from_numpy(np.random.randint(2, V - 2, size=(totalTrainingSample, 10)))
y = torch.flip(torch.flip(x, [0, 1]), [0])
x[:, 0] = 1
y[:, 0] = 1
x[:, -1] = V - 1
y[:, -1] = V - 1
return list(zip(x, y))
def getAccuracy(x, trg):
totalValAcc = 0
totalValAccToken = 0
trg = trg.contiguous().view(-1)
out = x.view(-1, x.size(-1)) # (batch_size * tgt_length, src_vocab)
_, index = torch.max(out, dim=-1) # index (batch_size * tgt_length)
correct = list((trg == index)).count(True)
totalValAcc += correct
totalValAccToken += index.size(0)
return totalValAcc / totalValAccToken
V = 20
transformer = nn.Transformer(num_encoder_layers=2, num_decoder_layers=2, batch_first=True)
PositionEnc = PositionalEncoding(512, 0.1)
src_emb = Embeddings(512, V)
tgt_emb = Embeddings(512, V)
gen = Generator(V)
if __name__ == '__main__':
model = Model(nn.Sequential(src_emb, PositionEnc), nn.Sequential(tgt_emb, PositionEnc), transformer, gen)
earlyStopping = EarlyStopping(monitor='val_loss', patience=3)
trainer = pl.Trainer(max_epochs=10, callbacks=[earlyStopping])
trainer.fit(model)
I'm trying to preprocess some text data but after creating a pytorch dataloader, and looping through to check if it is working, I get a Broken Pipe Error. However when trying again in Google Colab, the code works so I think it may be a problem with my setup.
(The Collate class is useless, I just haven't removed it yet.)
import numpy as np
import pandas as pd
data = pd.read_csv("imdb.csv")
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import spacy
spacy_eng = spacy.load("en")
class Vocabulary():
def __init__(self, freq_threshold=4):
self.word_to_index = {"<PAD>":0, "<SOS>":1, "<EOS>":2, "<UNK>":3}
self.freq_threshold = freq_threshold
self.max_length = 0
def __len__(self):
return len(self.word_to_index)
#staticmethod
def tokenizer_eng(text):
return [tok.text.lower() for tok in spacy_eng.tokenizer(text)]
def build_vocabulary(self, sentence_list):
frequencies = {}
idx = 4
longest_length = 0
for sentence in sentence_list:
if len(sentence) > longest_length:
self.max_length = len(sentence)
longest_length = self.max_length
for word in self.tokenizer_eng(sentence):
if word not in frequencies:
frequencies[word] = 1
else:
frequencies[word] += 1
if frequencies[word] == self.freq_threshold:
self.word_to_index[word] = idx
idx += 1
self.max_length += 25
def numericalize(self, text):
tokenized_text = self.tokenizer_eng(text)
vector_text = []
for token in tokenized_text:
if token in self.word_to_index:
vector_text.append(self.word_to_index[token])
else:
vector_text.append(self.word_to_index["<UNK>"])
vector_text.append(self.word_to_index["<EOS>"])
pad_length = self.max_length - len(vector_text)
for i in range(0, pad_length):
vector_text.append(self.word_to_index["<PAD>"])
return vector_text
class IMDBDataset(Dataset):
def __init__(self):
data = pd.read_csv("imdb.csv").to_numpy()
self.target = []
for data_point in data[:, 2]:
if data_point == "neg":
self.target.append(0)
else:
self.target.append(1)
self.text = data[:, 4]
self.vocab = Vocabulary()
self.vocab.build_vocabulary(self.text)
def __len__(self):
return self.text.shape[0]
def __getitem__(self, idx):
review = self.text[idx]
vector_text = [self.vocab.word_to_index["<SOS>"]]
vector_text += self.vocab.numericalize(review)
target = self.target[idx]
return torch.tensor(vector_text), torch.tensor(target)
class Collate:
def __init__(self, pad_idx):
self.pad_idx = pad_idx
def __call__(self, batch):
text = [item[0] for item in batch]
text = nn.utils.rnn.pad_sequence(text, batch_first=False, padding_value=self.pad_idx)
return text, batch[1]
def get_loader(batch_size=32, num_workers=4, shuffle=True, pin_memory=True):
dataset = IMDBDataset()
pad_idx = dataset.vocab.word_to_index["<PAD>"]
loader = DataLoader(
dataset=dataset,
batch_size=batch_size,
num_workers=num_workers,
shuffle=shuffle,
pin_memory=pin_memory,
collate_fn=Collate(pad_idx=pad_idx) # Redundant now
)
return loader, dataset
train_dl, train_ds = get_loader()
for idx, (data, target) in enumerate(train_dl):
print(data.shape)
Dunno why it worked, but by removing the get_loader() function and just getting the dataloader by itself, fixed this.
train_dl = DataLoader(dataset, batch_size=32, shuffle=True)
You can try using
if __name__ == '__main__' and '__file__' in globals():
I guess you are using Windows. Pytorch's dataloader gives this error when you are setting num_workers > 0. So to fix this error, set num_workers = 0 or call the dataloader under if __name__ == "__main__: (I can't explain you why the last one works).
I am using data generator in Keras to train a model with a large dataset. But I am getting the error Error when checking input: expected input_8 to have 4 dimensions, but got array with shape () ever time on the last batch of the first epoch. But I checked my dataset file, it doesn't have an empty array, so how did empty array come? I even tried printing the array as they were generated and few of them were shown empty. Here is my code for the data generator:
class data_generator(Sequence):
def __init__(self,data_file,type_data,batch_size,shuffle=True):
self.data_file = data_file
self.type_data = type_data
self.batch_size = batch_size
self.shuffle = shuffle
self.on_epoch_end()
def on_epoch_end(self):
if self.type_data == "train":
self.indices = np.arange(3450000)
else:
self.indices = np.arange(345000)
if self.shuffle:
np.random.shuffle(self.indices)
def __data__generation(self,indices):
return X,Y
def __len__(self):
if self.type_data == "train":
return int(np.ceil(10000 / float(self.batch_size)))
else:
return int(np.ceil(1000 / float(self.batch_size)))
def __getitem__(self,index):
#print(self.indices[(index)*self.batch_size], self.indices[(index+1)*self.batch_size])
X = np.array(HDF5Matrix(self.data_file, self.type_data + "_X", start = self.indices[index*self.batch_size], end = self.indices[(index+1)*self.batch_size]))
Y = np.array(HDF5Matrix(self.data_file, self.type_data + "_Y", start = self.indices[index*self.batch_size], end = self.indices[(index+1)*self.batch_size]))
#print(X.shape, Y.shape)
return X,Y
And here is my code for starting the fit generator:
train_generator = data_generator("drive/My Drive/Dataset/dataset.h5", "train", 20)
eval_generator = data_generator("drive/My Drive/Dataset/dataset.h5", "eval", 20)
model = create_model()
history = model.fit_generator(generator = train_generator,epochs = 100,validation_data=eval_generator,use_multiprocessing=False)
How do I solve this issue? Also is there any alternative for data generator for training on large datasets? The data generator is very buggy and gives lots of error.
The code had few mistakes. I changed it and now it is working, but still, don't know why exactly that error occurred. Here is the new code:
class data_generator(Sequence):
def __init__(self,data_file,type_data,batch_size,shuffle=True):
self.data_file = data_file
self.type_data = type_data
self.batch_size = batch_size
self.shuffle = shuffle
self.on_epoch_end()
def on_epoch_end(self):
if self.type_data == "train":
self.indices = np.arange(3450000)
else:
self.indices = np.arange(345000)
if self.shuffle:
np.random.shuffle(self.indices)
def __data__generation(self,indices):
X = []
Y = []
for index in indices:
X.append(np.array(HDF5Matrix(self.data_file, self.type_data + "_X", start = index, end = index + 1)[0]))
Y.append(np.array(HDF5Matrix(self.data_file, self.type_data + "_Y", start = index, end = index + 1)[0]))
X = np.array(X)
Y = np.array(Y)
return X,Y
def __len__(self):
if self.type_data == "train":
return int(np.ceil(3450000 / float(self.batch_size)))
else:
return int(np.ceil(345000 / float(self.batch_size)))
def __getitem__(self,index):
indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
X, Y = self.__data__generation(indices)
#print(X.shape, Y.shape, index)
return X,Y
Keras needs while true (infinite loop) to avoid StopIteration. But in a plain generator, after correct steps_per_epoch (sample_size//batch_size), the shape would be zero.
I am making a prediction and implementing a neural network, is currently working with the numpy library and I am adapting the code to the data that I have.
I leave the current progress of the neural network, I have an error at the end of the code and I do not understand it well.
Anyone who can help me please?
import numpy as np
from sklearn.cross_validation import train_test_split
class LinearLayer:
def __init__(self, n_input, n_output):
self.n = n_input
self.m = n_output
self.W = (1/np.sqrt(n_input))*np.random.rand(n_input+1, n_output)
def forward(self, X):
self.input = np.zeros((X.shape[0],self.n+1))
# if only one feature, the input should always be a batch, at least
if len(X.shape) == 1: # of one element
self.input[:-1,:] = X.reshape(-1,self.n)
else:
self.input[:,:-1] = X
self.input[:,-1] = 1
self.output = self.input.dot(self.W) # xW + b
return self.output
def backward(self, d_out):
self.gradients = self.W.dot(d_out)[:-1]
self.dW = np.einsum("ij,ki", self.input, d_out)
return self.gradients
def updateWeights(self, lr=0.1):
self.W = self.W - lr*self.dW
class Sigmoid:
def __init__(self, n_input):
self.output = np.zeros(n_input)
self.gradients = np.zeros(n_input)
def forward(self, X):
self.output = 1/(np.exp(-X)+1)
return self.output
def backward(self, d_out):
ds = self.output.T*(1 - self.output).T
self.gradients = ds*d_out
return self.gradients
print("Training a multilayer perceptron\n")
import pandas as pd
data = pd.read_csv('Data_Balanceada.csv') #Data (74,11)
X = data.iloc[:,0:11]
y = data.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=1)
h1 = LinearLayer(11,1) #stack some layers
s1 = Sigmoid(7)
h2 = LinearLayer(7,1)
s2 = Sigmoid(1)
def loss(pred, target):
return np.mean(np.power(pred-target,2))
predict = lambda x: s2.forward(h2.forward(s1.forward(h1.forward(x))))
backpropagate = lambda d: h1.backward(s1.backward(h2.backward(s2.backward(d))))
lr = 0.005
n = 0 # patience
max_epochs = 1500
valid = loss(predict(X_test), y_test)
for i in range(max_epochs):
l = 0
p = predict(X_train)
backpropagate(p.T-y_train.T)
h1.updateWeights(lr)
h2.updateWeights(lr)
l = loss(p,y_train)
new_valid = loss(predict(X_test), y_test)
if new_valid < valid:
valid = new_valid
n = 0
else:
n += 1
if n > 50: break
if i%50 == 0:
print("Loss: {0}\t\tValidation: {1}".format(l/100, valid))
lr = lr*0.97
# Validation
print("\nFinal validation loss: {0}. {1} epochs\n".format(loss(predict(X_test), y_test),i+1))
#print(np.argmax(predict(X_test), axis=1))
#print(np.argmax(y_test, axis=1))
link Dataset:
https://mega.nz/#!jM8AQAbB!61NOeJadGXtiKJQsn_tdJ955p5lRD6kQjBlCQTHtt6I
I have this error:
Data must be 1-dimensional
IMG - ERROR