Fine tuning bert on next sentence prediction task

Fine tuning bert on next sentence prediction task - python

I am trying to fine-tune Bert using the Huggingface library on next sentence prediction task. I looked at the tutorial and I am trying to use DataCollatorForNextSentencePrediction and TextDatasetForNextSentencePrediction . When I am using that I get the following error. I have provided my code bellow.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-18-7678758b2c9c> in <module>()
56 train(bert_model,bert_tokenizer,train_data_set_path)
57 #prepare_data_set(bert_tokenizer)
---> 58 main()
9 frames
<ipython-input-18-7678758b2c9c> in main()
54 bert_model = BertForNextSentencePrediction.from_pretrained("bert-base-cased")
55 train_data_set_path = "/content/drive/My Drive/next_sentence/line_data_set_file.txt"
---> 56 train(bert_model,bert_tokenizer,train_data_set_path)
57 #prepare_data_set(bert_tokenizer)
58 main()
<ipython-input-18-7678758b2c9c> in train(bert_model, bert_tokenizer, path, eval_path)
47
48 )
---> 49 trainer.train()
50 trainer.save_model(out_dir)
51 def main():
/usr/local/lib/python3.6/dist-packages/transformers/trainer.py in train(self, model_path, trial)
697
698 epoch_pbar = tqdm(epoch_iterator, desc="Iteration", disable=disable_tqdm)
--> 699 for step, inputs in enumerate(epoch_iterator):
700
701 # Skip past any already trained steps if resuming training
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
361
362 def __next__(self):
--> 363 data = self._next_data()
364 self._num_yielded += 1
365 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
401 def _next_data(self):
402 index = self._next_index() # may raise StopIteration
--> 403 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
404 if self._pin_memory:
405 data = _utils.pin_memory.pin_memory(data)
/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
45 else:
46 data = self.dataset[possibly_batched_index]
---> 47 return self.collate_fn(data)
/usr/local/lib/python3.6/dist-packages/transformers/data/data_collator.py in __call__(self, examples)
356 for i, doc in enumerate(examples):
357 input_id, segment_id, attention_mask, label = self.create_examples_from_document(doc, i, examples)
--> 358 input_ids.extend(input_id)
359 segment_ids.extend(segment_id)
360 attention_masks.extend(attention_mask)
/usr/local/lib/python3.6/dist-packages/transformers/data/data_collator.py in create_examples_from_document(self, document, doc_index, examples)
444 random_document = examples[random_document_index]
445 random_start = random.randint(0, len(random_document) - 1)
--> 446 for j in range(random_start, len(random_document)):
447 tokens_b.extend(random_document[j])
448 if len(tokens_b) >= target_b_length:
/usr/lib/python3.6/random.py in randint(self, a, b)
219 """
220
--> 221 return self.randrange(a, b+1)
222
223 def _randbelow(self, n, int=int, maxsize=1<<BPF, type=type,
/usr/lib/python3.6/random.py in randrange(self, start, stop, step, _int)
197 return istart + self._randbelow(width)
198 if step == 1:
--> 199 raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width))
200
201 # Non-unit step argument supplied.
ValueError: empty range for randrange() (0,0, 0)
def train(bert_model,bert_tokenizer,path,eval_path=None):
out_dir = "/content/drive/My Drive/next_sentence/"
training_args = TrainingArguments(
output_dir=out_dir,
overwrite_output_dir=True,
num_train_epochs=1,
per_device_train_batch_size=30,
save_steps=10000,
save_total_limit=2,
)
data_collator = DataCollatorForNextSentencePrediction(
tokenizer=bert_tokenizer,mlm=False,block_size=512,nsp_probability =0.5
)
dataset = TextDatasetForNextSentencePrediction(
tokenizer = bert_tokenizer,
file_path=path,
block_size=512,
)
trainer = Trainer(
model=bert_model,
args=training_args,
train_dataset=dataset,
data_collator=data_collator,
)
trainer.train()
trainer.save_model(out_dir)
def main():
print("Running main")
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
bert_model = BertForNextSentencePrediction.from_pretrained("bert-base-cased")
train_data_set_path = "/content/drive/My Drive/next_sentence/line_data_set_file.txt"
train(bert_model,bert_tokenizer,train_data_set_path)
#prepare_data_set(bert_tokenizer)
main()

Related

Transform DVS128Gesture dataset

I am making a neural network in Python using the DVS128Gesture dataset. I want to transform the default 128x128 trinary frames to 32x32 binary frames, but when I try to use torchvision.transform in the dataset, I am getting this error:
img should be PIL Image. Got <class 'numpy.lib.npyio.NpzFile'>
My code:
import torch
import torchvision
from spikingjelly.datasets.dvs128_gesture import DVS128Gesture
train_data = DVS128Gesture(root_dir, train=True, data_type='event',
transform=torchvision.transforms.Compose([
torchvision.transforms.Resize(32),
torchvision.transforms.Normalize((0.0,), (0.8,)),
torchvision.transforms.ToTensor()
]))
test_data = DVS128Gesture(root_dir, train=False, data_type='event',
transform=torchvision.transforms.Compose([
torchvision.transforms.Resize(32),
torchvision.transforms.Normalize((0.0,), (0.8,)),
torchvision.transforms.ToTensor()
]))
train_loader = torch.utils.data.DataLoader(train_data, batch_size=bs, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=bs, shuffle=True)
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)
example_data.shape
I have done the same with the MNIST dataset and everything worked as expected. I think the problem is that I use torchvision.transform in DVS128Gesture, but I am not sure what else I can use.
The same with MNIST:
train_data = torchvision.datasets.MNIST(root_dir, train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.Resize(28),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.0,), (0.8,))
]))
test_data = torchvision.datasets.MNIST(root_dir, train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.Resize(28),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.0,), (0.8,))
]))
What am I doing wrong?
Stack trace of error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In [10], line 2
1 examples = enumerate(test_loader)
----> 2 batch_idx, (example_data, example_targets) = next(examples)
3 example_data.shape
File p:\Programs\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py:681, in _BaseDataLoaderIter.__next__(self)
678 if self._sampler_iter is None:
679 # TODO(https://github.com/pytorch/pytorch/issues/76750)
680 self._reset() # type: ignore[call-arg]
--> 681 data = self._next_data()
682 self._num_yielded += 1
683 if self._dataset_kind == _DatasetKind.Iterable and \
684 self._IterableDataset_len_called is not None and \
685 self._num_yielded > self._IterableDataset_len_called:
File p:\Programs\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py:721, in _SingleProcessDataLoaderIter._next_data(self)
719 def _next_data(self):
720 index = self._next_index() # may raise StopIteration
--> 721 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
722 if self._pin_memory:
723 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
File p:\Programs\Anaconda3\lib\site-packages\torch\utils\data\_utils\fetch.py:49, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
File p:\Programs\Anaconda3\lib\site-packages\torch\utils\data\_utils\fetch.py:49, in <listcomp>(.0)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
File p:\Programs\Anaconda3\lib\site-packages\torchvision\datasets\folder.py:232, in DatasetFolder.__getitem__(self, index)
230 sample = self.loader(path)
231 if self.transform is not None:
--> 232 sample = self.transform(sample)
233 if self.target_transform is not None:
234 target = self.target_transform(target)
File p:\Programs\Anaconda3\lib\site-packages\torchvision\transforms\transforms.py:94, in Compose.__call__(self, img)
92 def __call__(self, img):
93 for t in self.transforms:
---> 94 img = t(img)
95 return img
File p:\Programs\Anaconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File p:\Programs\Anaconda3\lib\site-packages\torchvision\transforms\transforms.py:349, in Resize.forward(self, img)
341 def forward(self, img):
342 """
343 Args:
344 img (PIL Image or Tensor): Image to be scaled.
(...)
347 PIL Image or Tensor: Rescaled image.
348 """
--> 349 return F.resize(img, self.size, self.interpolation, self.max_size, self.antialias)
File p:\Programs\Anaconda3\lib\site-packages\torchvision\transforms\functional.py:430, in resize(img, size, interpolation, max_size, antialias)
428 warnings.warn("Anti-alias option is always applied for PIL Image input. Argument antialias is ignored.")
429 pil_interpolation = pil_modes_mapping[interpolation]
--> 430 return F_pil.resize(img, size=size, interpolation=pil_interpolation, max_size=max_size)
432 return F_t.resize(img, size=size, interpolation=interpolation.value, max_size=max_size, antialias=antialias)
File p:\Programs\Anaconda3\lib\site-packages\torchvision\transforms\functional_pil.py:249, in resize(img, size, interpolation, max_size)
240 #torch.jit.unused
241 def resize(
242 img: Image.Image,
(...)
245 max_size: Optional[int] = None,
246 ) -> Image.Image:
248 if not _is_pil_image(img):
--> 249 raise TypeError(f"img should be PIL Image. Got {type(img)}")
250 if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))):
251 raise TypeError(f"Got inappropriate size arg: {size}")
TypeError: img should be PIL Image. Got <class 'numpy.lib.npyio.NpzFile'>

The error probably comes from the Resize transform (can you provide more details on the stack trace of the error ?).
Resize is a image specific tranform, expecting a PIL image (or a torch Tensor, see the transform documentation), while your dataset DVS128Gesture outputs another object type.

How do I resolve the error "IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)"?

I tried my best to make a minimal reproducible example: there's an issue in my train() function where on the line output = model(data); I get the error IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1). I also get an error before that but I cannot decipher the meaning. I've included the full traceback in this message.
I've seen other users post about the same error message, but each one has a different solution; I used the debugger to look into data.location but I'm still unable to resolve the problem. I'm using PySyft v0.2.9.
import torch
import syft as sy
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from auto_esn.esn.esn import ESNBase
import torch.nn.functional as F
import torch.optim as optim
from auto_esn.esn.esn import GroupedDeepESN
from auto_esn.esn.reservoir.util import NRMSELoss
class CarHackingDataset(Dataset):
"""
Loading the Car-Hacking Dataset from
https://ocslab.hksecurity.net/Datasets/car-hacking-dataset
Args:
csv_file: A path to the dataset file which has the extension CSV.
root_dir: The directory of the parent folder of the dataset.
transform (callable, optional): Optional tansform to be applied on a sample.
"""
def __init__(self, csv_file: str, root_dir: str, transform=None):
self.car_hacking_frame = pd.read_csv(csv_file)[:10000]
self.root_dir = root_dir
self.transform = transform
def __getitem__(self,idx):
'''Grabs relevant features from the dataset.'''
if torch.is_tensor(idx):
idx = idx.tolist()
features = ['Timestamp', 'DLC', 'CAN_ID', 'Data']
X_train = self.car_hacking_frame.loc[:, features].values
X_train_scaled = StandardScaler().fit_transform(X_train)
X_train_scaled = torch.as_tensor(X_train_scaled)
# It looks like it's a bad idea to encode features.
# https://stackoverflow.com/questions/61217713/labelencoder-for-categorical-features
class_le = LabelEncoder()
target = class_le.fit_transform(self.car_hacking_frame['Flag'].values)
target = torch.as_tensor(target)
return X_train_scaled[idx], target[idx]
def __len__(self):
return len(self.car_hacking_frame)
train_dataset = CarHackingDataset(csv_file='/content/car_hacking_data/clean_fuzzy_dataset.csv',
root_dir='/content/car_hacking_data')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
hook = sy.TorchHook(torch)
car1 = sy.VirtualWorker(hook, id="car1")
car2 = sy.VirtualWorker(hook, id="car2")
args = {
'batch_size' : 32,
'epochs' : 1
}
federated_train_loader = sy.FederatedDataLoader(train_dataset.federate((car1, car2)),
batch_size=args['batch_size'], shuffle=True)
# Intializing the loss function which is probably a variation of mean squared error.
nrmse = NRMSELoss()
def train(model, device, federated_train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader):
model = model.send(data.location)
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = nrmse(output, target)
loss.backward()
optimizer.step()
model.get()
if batch_idx % 10 == 0:
loss = loss.get()
print(f'''Train Epoch: {epoch} [{(batch_idx * args['batch_size'])}/{(len(federated_train_loader) * args['batch_size'])}'''
+ f'''({100. * batch_idx / len(federated_train_loader):.0f}%)]\tLoss: {loss.item():.6f}''')
model = GroupedDeepESN().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01)
for epoch in range(1, args['batch_size'] + 1):
train(model, device, federated_train_loader, optimizer, epoch)
Traceback of the error message:
---------------------------------------------------------------------------
PureFrameworkTensorFoundError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/syft/frameworks/torch/tensors/interpreters/native.py in handle_func_command(cls, command)
336 new_args, new_kwargs, new_type, args_type = hook_args.unwrap_args_from_function(
--> 337 cmd, args_, kwargs_, return_args_type=True
338 )
20 frames
/usr/local/lib/python3.7/dist-packages/syft/generic/frameworks/hook/hook_args.py in unwrap_args_from_function(attr, args_, kwargs_, return_args_type)
166 # Run it
--> 167 new_args = args_hook_function(args_)
168
/usr/local/lib/python3.7/dist-packages/syft/generic/frameworks/hook/hook_args.py in <lambda>(x)
355
--> 356 return lambda x: f(lambdas, x)
357
/usr/local/lib/python3.7/dist-packages/syft/generic/frameworks/hook/hook_args.py in three_fold(lambdas, args_, **kwargs)
534 lambdas[0](args_[0], **kwargs),
--> 535 lambdas[1](args_[1], **kwargs),
536 lambdas[2](args_[2], **kwargs),
/usr/local/lib/python3.7/dist-packages/syft/generic/frameworks/hook/hook_args.py in <lambda>(i)
330 # Last if not, rule is probably == 1 so use type to return the right transformation.
--> 331 else lambda i: forward_func[type(i)](i)
332 for a, r in zip(args_, rules) # And do this for all the args / rules provided
/usr/local/lib/python3.7/dist-packages/syft/frameworks/torch/hook/hook_args.py in <lambda>(i)
23 if hasattr(i, "child")
---> 24 else (_ for _ in ()).throw(PureFrameworkTensorFoundError),
25 torch.nn.Parameter: lambda i: i.child
/usr/local/lib/python3.7/dist-packages/syft/frameworks/torch/hook/hook_args.py in <genexpr>(.0)
23 if hasattr(i, "child")
---> 24 else (_ for _ in ()).throw(PureFrameworkTensorFoundError),
25 torch.nn.Parameter: lambda i: i.child
PureFrameworkTensorFoundError:
During handling of the above exception, another exception occurred:
IndexError Traceback (most recent call last)
<ipython-input-6-c9ac87b98598> in <module>
24
25 for epoch in range(1, args['batch_size'] + 1):
---> 26 train(model, device, federated_train_loader, optimizer, epoch)
<ipython-input-6-c9ac87b98598> in train(model, device, federated_train_loader, optimizer, epoch)
7 data, target = data.to(device), target.to(device)
8 optimizer.zero_grad()
----> 9 output = model(data)
10
11 loss = nrmse(output, target)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
/usr/local/lib/python3.7/dist-packages/auto_esn/esn/esn.py in forward(self, input)
31 mapped_input = self.reservoir(input)
32
---> 33 return self.readout(mapped_input)
34
35 def reset_hidden(self):
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
/usr/local/lib/python3.7/dist-packages/auto_esn/esn/readout/svr_readout.py in forward(self, input)
10
11 def forward(self, input: Tensor) -> Tensor:
---> 12 return self.readout(input)
13
14 def fit(self, input: Tensor, target: Tensor):
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
530 result = self._slow_forward(*input, **kwargs)
531 else:
--> 532 result = self.forward(*input, **kwargs)
533 for hook in self._forward_hooks.values():
534 hook_result = hook(self, input, result)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/linear.py in forward(self, input)
85
86 def forward(self, input):
---> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):
/usr/local/lib/python3.7/dist-packages/syft/generic/frameworks/hook/hook.py in overloaded_func(*args, **kwargs)
333 handle_func_command = syft.framework.Tensor.handle_func_command
334
--> 335 response = handle_func_command(command)
336
337 return response
/usr/local/lib/python3.7/dist-packages/syft/frameworks/torch/tensors/interpreters/native.py in handle_func_command(cls, command)
378 # in the execute_command function
379 try:
--> 380 response = cls._get_response(cmd, args_, kwargs_)
381 except AttributeError:
382 # Change the library path to avoid errors on layers like AvgPooling
/usr/local/lib/python3.7/dist-packages/syft/frameworks/torch/tensors/interpreters/native.py in _get_response(cmd, args_, kwargs_)
412
413 if isinstance(args_, tuple):
--> 414 response = command_method(*args_, **kwargs_)
415 else:
416 response = command_method(args_, **kwargs_)
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1368 if input.dim() == 2 and bias is not None:
1369 # fused op is marginally faster
-> 1370 ret = torch.addmm(bias, input, weight.t())
1371 else:
1372 output = input.matmul(weight.t())
/usr/local/lib/python3.7/dist-packages/syft/generic/frameworks/hook/hook.py in overloaded_func(*args, **kwargs)
333 handle_func_command = syft.framework.Tensor.handle_func_command
334
--> 335 response = handle_func_command(command)
336
337 return response
/usr/local/lib/python3.7/dist-packages/syft/frameworks/torch/tensors/interpreters/native.py in handle_func_command(cls, command)
378 # in the execute_command function
379 try:
--> 380 response = cls._get_response(cmd, args_, kwargs_)
381 except AttributeError:
382 # Change the library path to avoid errors on layers like AvgPooling
/usr/local/lib/python3.7/dist-packages/syft/frameworks/torch/tensors/interpreters/native.py in _get_response(cmd, args_, kwargs_)
412
413 if isinstance(args_, tuple):
--> 414 response = command_method(*args_, **kwargs_)
415 else:
416 response = command_method(args_, **kwargs_)
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
I thought it might also be important to include the shape of my data inside the train function's loop over the federated data: Data shape: torch.Size([32, 4]), Target shape: torch.Size([32]).

multiclass sequence classifiaction with fastai and huggingface

I am looking to implement DistilBERT via fastai and huggingface for a mutliclass sequence classification problem. I found a useful tutorial that gave a good example on how to do this with binary classification. The code is below:
# !pip install torch==1.9.0
# !pip install torchtext==0.10
# !pip install transformers==4.7
# !pip install fastai==2.4
from fastai.text.all import *
from sklearn.model_selection import train_test_split
import pandas as pd
import glob
from transformers import AutoTokenizer, AutoModelForSequenceClassification
hf_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
hf_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased")
"""
train_df and val_df looks like this:
label text
4240 5 whoa interesting.
13 7 you could you could we just
4639 4 you set the goal,
28 1 because ive already agreed to that
66 8 oh hey freshman thats you gona need
"""
print(list(train_df.label.value_counts().index))
"""
[4, 1, 5, 6, 7, 0, 2, 3, 8]
"""
class HF_Dataset(torch.utils.data.Dataset):
def __init__(self, df, hf_tokenizer):
self.df = df
self.hf_tokenizer = hf_tokenizer
self.label_map = {
0:0,
1:0,
2:0,
3:0,
4:1,
5:1,
6:1,
7:1,
8:1
}
def __len__(self):
return len(self.df)
def decode(self, token_ids):
return ' '.join([hf_tokenizer.decode(x) for x in tokenizer_outputs['input_ids']])
def decode_to_original(self, token_ids):
return self.hf_tokenizer.decode(token_ids.squeeze())
def __getitem__(self, index):
label, text = self.df.iloc[index]
label = self.label_map[label]
label = torch.tensor(label)
tokenizer_output = self.hf_tokenizer(text, return_tensors="pt", padding='max_length', truncation=True, max_length=512)
tokenizer_output['input_ids'].squeeze_()
tokenizer_output['attention_mask'].squeeze_()
return tokenizer_output, label
train_dataset = HF_Dataset(train_df, hf_tokenizer)
valid_dataset = HF_Dataset(valid_df, hf_tokenizer)
train_dl = DataLoader(train_dataset, bs=16, shuffle=True)
valid_dl = DataLoader(valid_dataset, bs=16)
dls = DataLoaders(train_dl, valid_dl)
hf_model(**batched_data)
class HF_Model(nn.Module):
def __init__(self, hf_model):
super().__init__()
self.hf_model = hf_model
def forward(self, tokenizer_outputs):
model_output = self.hf_model(**tokenizer_outputs)
return model_output.logits
model = HF_Model(hf_model)
# Manually popping the model onto the gpu since the data is in a dictionary format
# (doesn't automatically place model + data on gpu otherwise)
learn = Learner(dls, model, loss_func=nn.CrossEntropyLoss(), metrics=[accuracy])
learn.fit_one_cycle(3, 1e-4)
This works fine. However, I mapped my multiclass labels to 2 labels to allow this to work. I actually have 9 classes. I tried adjusting the label mapping scheme in HF_Dataset() class to match my actual labels like below:
class HF_Dataset(torch.utils.data.Dataset):
def __init__(self, df, hf_tokenizer):
self.df = df
self.hf_tokenizer = hf_tokenizer
self.label_map = {
0:0,
1:1,
2:2,
3:3,
4:4,
5:5,
6:6,
7:7,
8:8
}
def __len__(self):
return len(self.df)
def decode(self, token_ids):
return ' '.join([hf_tokenizer.decode(x) for x in tokenizer_outputs['input_ids']])
def decode_to_original(self, token_ids):
return self.hf_tokenizer.decode(token_ids.squeeze())
def __getitem__(self, index):
label, text = self.df.iloc[index]
label = self.label_map[label]
label = torch.tensor(label)
tokenizer_output = self.hf_tokenizer(text, return_tensors="pt", padding='max_length', truncation=True, max_length=512)
tokenizer_output['input_ids'].squeeze_()
tokenizer_output['attention_mask'].squeeze_()
return tokenizer_output, label
Every line works until learn.fit_one_cycle.
Here is the full stack trace from this line:
0.00% [0/3 00:00<00:00]
epoch train_loss valid_loss accuracy time
0.00% [0/519 00:00<00:00]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-21-0ec2ff9e12e1> in <module>
----> 1 learn.fit_one_cycle(3, 1e-4)
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
111 scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
112 'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 113 self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
114
115 # Cell
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
219 self.opt.set_hypers(lr=self.lr if lr is None else lr)
220 self.n_epoch = n_epoch
--> 221 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
222
223 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
161
162 def _with_events(self, f, event_type, ex, final=noop):
--> 163 try: self(f'before_{event_type}'); f()
164 except ex: self(f'after_cancel_{event_type}')
165 self(f'after_{event_type}'); final()
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in _do_fit(self)
210 for epoch in range(self.n_epoch):
211 self.epoch=epoch
--> 212 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
213
214 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
161
162 def _with_events(self, f, event_type, ex, final=noop):
--> 163 try: self(f'before_{event_type}'); f()
164 except ex: self(f'after_cancel_{event_type}')
165 self(f'after_{event_type}'); final()
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in _do_epoch(self)
204
205 def _do_epoch(self):
--> 206 self._do_epoch_train()
207 self._do_epoch_validate()
208
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in _do_epoch_train(self)
196 def _do_epoch_train(self):
197 self.dl = self.dls.train
--> 198 self._with_events(self.all_batches, 'train', CancelTrainException)
199
200 def _do_epoch_validate(self, ds_idx=1, dl=None):
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
161
162 def _with_events(self, f, event_type, ex, final=noop):
--> 163 try: self(f'before_{event_type}'); f()
164 except ex: self(f'after_cancel_{event_type}')
165 self(f'after_{event_type}'); final()
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in all_batches(self)
167 def all_batches(self):
168 self.n_iter = len(self.dl)
--> 169 for o in enumerate(self.dl): self.one_batch(*o)
170
171 def _do_one_batch(self):
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in one_batch(self, i, b)
192 b = self._set_device(b)
193 self._split(b)
--> 194 self._with_events(self._do_one_batch, 'batch', CancelBatchException)
195
196 def _do_epoch_train(self):
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
161
162 def _with_events(self, f, event_type, ex, final=noop):
--> 163 try: self(f'before_{event_type}'); f()
164 except ex: self(f'after_cancel_{event_type}')
165 self(f'after_{event_type}'); final()
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/fastai/learner.py in _do_one_batch(self)
173 self('after_pred')
174 if len(self.yb):
--> 175 self.loss_grad = self.loss_func(self.pred, *self.yb)
176 self.loss = self.loss_grad.clone()
177 self('after_loss')
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
1119 def forward(self, input: Tensor, target: Tensor) -> Tensor:
1120 return F.cross_entropy(input, target, weight=self.weight,
-> 1121 ignore_index=self.ignore_index, reduction=self.reduction)
1122
1123
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2822 if size_average is not None or reduce is not None:
2823 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2824 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2825
2826
IndexError: Target 6 is out of bounds.
This seems like it should be a simple fix. Do I need to adjust something in the model architecture to allow it to accept 9 labels? Or do I need to one hot encode my labels? If so, is there a solution prebuilt to do this in the pipeline?

You need to define num_labels=9 when loading the model:
hf_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=9)
The default value is 2, which suits the first use-case, but breaks when you tried to change.
Note that the lib explictly says that the classifier (which generates the .logits that are of your interest) is randomly initialized:
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

AttributeError: 'list' object has no attribute 'rank' When converting Keras Model To CoreML

I am trying to convert my Keras model that contains GRU layers to generate Shakespeares text to a coreml model, although when I try to convert it, I get the error "AttributeError: 'list' object has no attribute 'rank'". I followed the instructions on this website. Here is my code:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import coremltools as ct
model = keras.models.load_model("checkpointshakespear.h5")
mlmodel = ct.convert(model)
This is my model layer:
model = keras.models.Sequential([
keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id],
dropout=0.2, recurrent_dropout=0.2),
keras.layers.GRU(128, return_sequences=True,
dropout=0.2, recurrent_dropout=0.2),
keras.layers.TimeDistributed(keras.layers.Dense(max_id,
activation="softmax"))
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
This is the full error:
Running TensorFlow Graph Passes: 100%|██████████| 5/5 [00:00<00:00, 9.78 passes/s]
Converting Frontend ==> MIL Ops: 0%| | 0/84 [00:00<?, ? ops/s]
Converting Frontend ==> MIL Ops: 0%| | 0/95 [00:00<?, ? ops/s]
Converting Frontend ==> MIL Ops: 0%| | 0/84 [00:00<?, ? ops/s]
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-2-8240f4ae502a> in <module>
6 import coremltools as ct
7 model = keras.models.load_model("checkpointshakespear.h5")
----> 8 mlmodel = ct.convert(model)
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/_converters_entry.py in convert(model, source, inputs, outputs, classifier_config, minimum_deployment_target, **kwargs)
256 outputs=outputs,
257 classifier_config=classifier_config,
--> 258 **kwargs
259 )
260
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in _convert(model, convert_from, convert_to, converter_registry, **kwargs)
118 backend_converter = backend_converter_type()
119
--> 120 prog = frontend_converter(model, **kwargs)
121 common_pass(prog)
122 out = backend_converter(prog, **kwargs)
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/converter.py in __call__(self, *args, **kwargs)
50
51 tf2_loader = TF2Loader(*args, **kwargs)
---> 52 return tf2_loader.load()
53
54
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow/load.py in load(self)
78 )
79
---> 80 program = self._program_from_tf_ssa()
81 logging.debug("program:\n{}".format(program))
82 return program
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow2/load.py in _program_from_tf_ssa(self)
179
180 converter = TF2Converter(self._tf_ssa, **self.kwargs)
--> 181 return converter.convert()
182
183 def _populate_sub_graph_input_shapes(self, graph, graph_fns):
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow/converter.py in convert(self)
392 for g_name in self.graph_stack[1:]:
393 self.context.add_graph(g_name, self.tfssa.functions[g_name].graph)
--> 394 self.convert_main_graph(prog, graph)
395
396 # Apply TF frontend passes on Program. These passes are different
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow/converter.py in convert_main_graph(self, prog, graph)
337 for name in func_inputs.keys():
338 self.context.add(name, ssa_func.inputs[name])
--> 339 outputs = convert_graph(self.context, graph, self.outputs)
340 ssa_func.set_outputs(outputs)
341 prog.add_function("main", ssa_func)
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow/convert_utils.py in convert_graph(context, graph, outputs)
179 )
180 raise NotImplementedError(msg)
--> 181 _add_op(context, node)
182
183 if len(node.outputs) > 0:
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow2/ops.py in StatelessWhile(context, node)
98 return body_output_vars
99
--> 100 x = mb.while_loop(_cond=cond, _body=body, loop_vars=loop_vars, name=node.name)
101
102 # wraps x as tuple for get_tuple that always follow the while node.
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/mil/ops/registry.py in add_op(cls, **kwargs)
60 #classmethod
61 def add_op(cls, **kwargs):
---> 62 return cls._add_op(op_cls, **kwargs)
63
64 setattr(Builder, op_type, add_op)
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/mil/builder.py in _add_op(cls, op_cls, **kwargs)
188 new_op = op_cls(**kwargs)
189 curr_block()._insert_op_before(new_op, before_op=before_op)
--> 190 new_op.build_nested_blocks()
191 new_op.type_value_inference()
192 if len(new_op.outputs) == 1:
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/mil/ops/defs/control_flow.py in build_nested_blocks(self)
302 v.consuming_blocks = list()
303
--> 304 block, exit_vars = self.build_block(block_inputs)
305
306 # Verify exit_vars has the same types as loop_vars
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/mil/ops/defs/control_flow.py in build_block(self, block_inputs)
271 # Body func
272 body_func = self._body.val
--> 273 exit_vars = body_func(*block.inputs)
274
275 # Cond func:
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow2/ops.py in body(*loop_vars)
94 def body(*loop_vars):
95 context.stack_func_inputs(loop_vars)
---> 96 body_output_vars = convert_graph(context, body_graph)
97 context.unstack_func_inputs()
98 return body_output_vars
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow/convert_utils.py in convert_graph(context, graph, outputs)
179 )
180 raise NotImplementedError(msg)
--> 181 _add_op(context, node)
182
183 if len(node.outputs) > 0:
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow/ops.py in StridedSlice(context, node)
1351 squeeze_mask,
1352 ellipsis_mask,
-> 1353 new_axis_mask,
1354 )
1355
~/opt/anaconda3/lib/python3.7/site-packages/coremltools/converters/mil/frontend/tensorflow/ops.py in _pad_mask(x, begin, end, stride, begin_mask, end_mask, squeeze_mask, ellipsis_mask, new_axis_mask)
1257 x_rank = x.rank + new_dims
1258 else:
-> 1259 x_rank = x.rank
1260
1261 def pad_array(arr, max_rank, idx, default_value):
AttributeError: 'list' object has no attribute 'rank'

Looks like the error is because of the recurrent_dropout parameter. Removing this parameter solves the error.
Also note that I have added batch_size parameter to the first GRU layer. This is necessary because CoreML inputs should be either rank 3 (Seq,B,C) or rank 5 (Seq,B,C,H,W) for RNNs.
This is the working code snippet.
import tensorflow.keras as keras
import coremltools as ct
max_id = 1000
model = keras.models.Sequential([
keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id], batch_size=64,
dropout=0.2),
keras.layers.GRU(128, return_sequences=True,
dropout=0.2),
keras.layers.TimeDistributed(keras.layers.Dense(max_id,
activation="softmax"))
])
model.save('tf_keras_model.h5')
mlmodel = ct.convert('tf_keras_model.h5', source="tensorflow")

What should do to fix my scikit-learn program?

A snippet of code involving RandomForestClassifier using the python machine learning library scikit-learn.
I am trying to give weight to different classes using the class_weight opition in the scikit's RandomForestClassifier.Below is my code snippet and then the error that I am getting
print 'Training...'
forest = RandomForestClassifier(n_estimators=500,class_weight= {0:1,1:1,2:1,3:1,4:1,5:1,6:1,7:4})
forest = forest.fit( train_data[0::,1::], train_data[0::,0] )
print 'Predicting...'
output = forest.predict(test_data).astype(int)
predictions_file = open("myfirstforest.csv", "wb")
open_file_object = csv.writer(predictions_file)
open_file_object.writerow(["PassengerId","Survived"])
open_file_object.writerows(zip(ids, output))
predictions_file.close()
print 'Done.'
And I am getting the following error:
Training...
IndexError Traceback (most recent call last)
<ipython-input-20-122f2e5a0d3b> in <module>()
84 print 'Training...'
85 forest = RandomForestClassifier(n_estimators=500,class_weight={0:1,1:1,2:1,3:1,4:1,5:1,6:1,7:4})
---> 86 forest = forest.fit( train_data[0::,1::], train_data[0::,0] )
87
88 print 'Predicting...'
/home/rpota/anaconda/lib/python2.7/site-packages/sklearn/ensemble/forest.pyc in fit(self, X, y, sample_weight)
216 self.n_outputs_ = y.shape[1]
217
--> 218 y, expanded_class_weight = self._validate_y_class_weight(y)
219
220 if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
/home/rpota/anaconda/lib/python2.7/site-packages/sklearn/ensemble/forest.pyc in _validate_y_class_weight(self, y)
433 class_weight = self.class_weight
434 expanded_class_weight = compute_sample_weight(class_weight,
--> 435 y_original)
436
437 return y, expanded_class_weight
/home/rpota/anaconda/lib/python2.7/site-packages/sklearn/utils/class_weight.pyc in compute_sample_weight(class_weight, y, indices)
150 weight_k = compute_class_weight(class_weight_k,
151 classes_full,
--> 152 y_full)
153
154 weight_k = weight_k[np.searchsorted(classes_full, y_full)]
/home/rpota/anaconda/lib/python2.7/site-packages/sklearn/utils/class_weight.pyc in compute_class_weight(class_weight, classes, y)
58 for c in class_weight:
59 i = np.searchsorted(classes, c)
---> 60 if classes[i] != c:
61 raise ValueError("Class label %d not present." % c)
62 else:
IndexError: index 2 is out of bounds for axis 0 with size 2
Please help!.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Fine tuning bert on next sentence prediction task - python

Related

Transform DVS128Gesture dataset

How do I resolve the error "IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)"?

multiclass sequence classifiaction with fastai and huggingface

AttributeError: 'list' object has no attribute 'rank' When converting Keras Model To CoreML

What should do to fix my scikit-learn program?

Categories

Resources