I can't understand why, using TabnetRegressor, it does not recognize in any way the parameters created using optuna, Using tabnetclassifier gives me no problem, but if I use tabnetregressor it tells me Unexpected argument
`
clf = TabNetRegressor(**final_params) # TabNetRegressor()
clf.fit(
X_train=X_train.values, y_train=y_train.values,
eval_set=[(X_test.values, y_test.values)],
patience=TabNet_params['patience'], max_epochs=epochs,
eval_metric=['rmse']
)
res.append(roc_auc_score(y_test.values, clf.predict(X_test.values)))
File line 504, in main_pipeline2
clf = TabNetRegressor(**final_params) # TabNetRegressor()
TypeError: __init__() got an unexpected keyword argument 'n_d'
this is how I create the hyperparameters, using tabnetclassifier because the regressor gives me problems
def Objective(trial):
mask_type = trial.suggest_categorical("mask_type", ["entmax", "sparsemax"])
n_da = trial.suggest_int("n_da", 56, 64, step=4)
n_steps = trial.suggest_int("n_steps", 1, 3, step=1)
gamma = trial.suggest_float("gamma", 1., 1.4, step=0.2)
n_shared = trial.suggest_int("n_shared", 1, 3)
lambda_sparse = trial.suggest_float("lambda_sparse", 1e-6, 1e-3, log=True)
tabnet_params = dict(n_d=n_da, n_a=n_da, n_steps=n_steps, gamma=gamma,
lambda_sparse=lambda_sparse, optimizer_fn=torch.optim.Adam,
optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
mask_type=mask_type, n_shared=n_shared,
scheduler_params=dict(mode="min",
patience=trial.suggest_int("patienceScheduler", low=3, high=10),
# changing sheduler patience to be lower than early stopping patience
min_lr=1e-5,
factor=0.5, ),
scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
verbose=0,
) # early stopping
regressor = TabNetClassifier(**tabnet_params)
regressor.fit(X_train=X_train.values, y_train=y_train.values,
eval_set=[(X_test.values, y_test.values)],
patience=trial.suggest_int("patience", low=15, high=30),
max_epochs=trial.suggest_int('epochs', 1, 100),
eval_metric=['rmse'])
avg = roc_auc_score(y_test.values, regressor.predict(X_test.values))
return avg
study = optuna.create_study(direction="maximize", study_name='TabNet optimization')
# TabNet_params = {'mask_type': 'entmax', 'n_da': 56, 'n_steps': 1, 'gamma': 1.2, 'n_shared': 1, 'lambda_sparse': 0.00018593172980376437, 'patienceScheduler': 8, 'patience': 17, 'epochs': 13}
TabNet_params = {'mask_type': 'entmax', 'n_da': 56, 'n_steps': 3, 'gamma': 1.4, 'n_shared': 2,
'lambda_sparse': 7.628773104483722e-05, 'patienceScheduler': 10, 'patience': 29, 'epochs': 45}
final_params = dict(n_d=TabNet_params['n_da'], n_a=TabNet_params['n_da'], n_steps=TabNet_params['n_steps'],
gamma=TabNet_params['gamma'],
lambda_sparse=TabNet_params['lambda_sparse'], optimizer_fn=torch.optim.Adam,
optimizer_params=dict(lr=2e-2, weight_decay=1e-5),
mask_type=TabNet_params['mask_type'], n_shared=TabNet_params['n_shared'],
scheduler_params=dict(mode="min",
patience=TabNet_params['patienceScheduler'],
min_lr=1e-5,
factor=0.5, ),
scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau,
verbose=0)
epochs = TabNet_params['epochs']
Related
I'm building a DCGAN, and I am having a problem with the shape of the output, it is not matching the shape of the labels when I try calculating the BCELoss.
To generate the discriminator output, do I have to use convolutions all the way down or can I add a Linear layer at some point to match the shape I want?
I mean, do I have to reduce the shape by adding more convolutional layers or can I add a fully connected one? I thought it should have a fully connected layer, but on every tutorial I checked the discriminator had no fully connected layer.
import random
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as torch_dataset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
seed = 1
print("Random Seed: ", seed)
random.seed(seed)
torch.manual_seed(seed)
images_folder_path = "./spectrograms/"
batch_size = 1
image_size = 256
n_channels = 1
z_vector = 100
n_features_generator = 32
n_features_discriminator = 32
num_epochs = 5
lr = 0.0002
beta1 = 0.5
dataset = torch_dataset.ImageFolder(
root=images_folder_path, transform=transforms.Compose(
[
transforms.Grayscale(num_output_channels=1),
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize(0.5, 0.5)
]
)
)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.main = nn.Sequential(
nn.ConvTranspose2d(z_vector, n_features_generator * 8, 4, 1, bias=False),
nn.BatchNorm2d(n_features_generator * 8),
nn.ReLU(True),
nn.ConvTranspose2d(n_features_generator * 8, n_features_generator * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_generator * 4),
nn.ReLU(True),
nn.ConvTranspose2d(n_features_generator * 4, n_features_generator * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_generator * 2),
nn.ReLU(True),
nn.ConvTranspose2d(n_features_generator * 2, n_features_generator, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_generator),
nn.ReLU(True),
nn.ConvTranspose2d(n_features_generator, n_channels, 4, 2, 1, bias=False),
nn.Tanh()
)
def forward(self, inputs):
return self.main(inputs)
# Convolutional Layer Output Shape = [(W−K+2P)/S]+1
# W is the input volume
# K is the Kernel size
# P is the padding
# S is the stride
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(n_channels, n_features_discriminator, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(n_features_discriminator, n_features_discriminator * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_discriminator * 2),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(n_features_discriminator * 2, n_features_discriminator * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_discriminator * 4),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(n_features_discriminator * 4, n_features_discriminator * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_discriminator * 8),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(n_features_discriminator * 8, 1, 4, 1, bias=False),
)
def forward(self, inputs):
return self.main(inputs)
netG = Generator().to(device)
if device.type == 'cuda':
netG = nn.DataParallel(netG)
netG.apply(weights_init)
print(netG)
netD = Discriminator().to(device)
if device.type == 'cuda':
netD = nn.DataParallel(netD)
netD.apply(weights_init)
print(netD)
criterion = nn.BCEWithLogitsLoss()
fixed_noise = torch.randn(64, z_vector, 1, 1, device=device)
real_label = 1.
fake_label = 0.
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))
img_list = []
G_losses = []
D_losses = []
iters = 0
print("Starting Training Loop...")
for epoch in range(num_epochs):
for i, data in enumerate(dataloader, 0):
netD.zero_grad()
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
output = netD(real_cpu)
print(output.shape)
print(label.shape)
output = output.view(-1)
errD_real = criterion(output, label)
errD_real.backward()
D_x = output.mean().item()
noise = torch.randn(b_size, z_vector, 1, 1, device=device)
fake = netG(noise)
label.fill_(fake_label)
output = netD(fake.detach()).view(-1)
errD_fake = criterion(output, label)
errD_fake.backward()
D_G_z1 = output.mean().item()
errD = errD_real + errD_fake
optimizerD.step()
netG.zero_grad()
label.fill_(real_label)
output = netD(fake).view(-1)
errG = criterion(output, label)
errG.backward()
D_G_z2 = output.mean().item()
optimizerG.step()
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
G_losses.append(errG.item())
D_losses.append(errD.item())
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1
The error I'm getting:
Traceback (most recent call last):
File "G:/Pastas Estruturadas/Conhecimento/CEFET/IA/SpectroGAN/dcgan.py", line 140, in <module>
errD_real = criterion(output, label)
File "C:\Users\Ramon\anaconda3\envs\vision\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\Ramon\anaconda3\envs\vision\lib\site-packages\torch\nn\modules\loss.py", line 631, in forward
reduction=self.reduction)
File "C:\Users\Ramon\anaconda3\envs\vision\lib\site-packages\torch\nn\functional.py", line 2538, in binary_cross_entropy_with_logits
raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
ValueError: Target size (torch.Size([1])) must be the same as input size (torch.Size([169]))
The shape of output: torch.Size([1, 1, 13, 13]), and shape of label: torch.Size([1]).
The DCGAN described a concrete architecture where Conv layers were used for the downsampling of the feature maps. If you carefully design your Conv layers, you can do without a Linear layer but that does not mean that it will not work when you use a Linear layer to downsample (especially as the very last layer). The DCGAN paper just found out it worked better to use Conv layers instead of Linear to downsample.
If you want to maintain this architecture, you can change the kernel size or padding or stride to give you exactly a single value in the last layer. Refer to the Pytorch documentation on Conv layers to see what the output size should be, given an input size
while searching for the optimal hyperparameters for my AlexNet with the help of Talos, I get am Out of Memory Error. It always happens at the same epoch (32/240), even if I change the parameters slightly (to exclude that the cause is an unfavorable constellation).
Error message:
ResourceExhaustedError: OOM when allocating tensor with shape[32,96,26,26] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[node max_pooling2d_1/MaxPool (defined at D:\anaconda\envs\tf_ks\lib\site-packages\keras\backend\tensorflow_backend.py:3009) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
[Op:__inference_keras_scratch_graph_246047]
Function call stack:
keras_scratch_graph
Here is my Code:
Session configuration:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth=True
config.gpu_options.per_process_gpu_memory_fraction = 0.99
sess = tf.compat.v1.Session(config = config)
K.set_session(sess)
Configuration and fitting of the AlexNet:
def alexnet(x_train, y_train, x_val, y_val, params):
K.clear_session()
if params['activation'] == 'leakyrelu':
activation_layer = LeakyReLU(alpha = params['leaky_alpha'])
elif params['activation'] == 'relu':
activation_layer = ReLU()
model = Sequential([
Conv2D(filters=96, kernel_size=(11,11), strides=(4,4), activation='relu', input_shape=(224,224,Global.num_image_channels)),
BatchNormalization(),
MaxPooling2D(pool_size=(3,3), strides=(2,2)),
Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
BatchNormalization(),
MaxPooling2D(pool_size=(3,3), strides=(2,2)),
Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
BatchNormalization(),
Conv2D(filters=384, kernel_size=(1,1), strides=(1,1), activation='relu', padding="same"),
BatchNormalization(),
Conv2D(filters=256, kernel_size=(1,1), strides=(1,1), activation='relu', padding="same"),
BatchNormalization(),
MaxPooling2D(pool_size=(3,3), strides=(2,2)),
Flatten(),
Dense(4096, activation=activation_layer),
Dropout(0.5),#todo
Dense(4096, activation=activation_layer),
Dropout(0.5),#todo
Dense(units = 2, activation=activation_layer)
#Dense(10, activation='softmax')
])
model.compile(
optimizer = params['optimizer'](lr = lr_normalizer(params['lr'], params['optimizer'])),
loss = Global.loss_funktion,
metrics = get_reduction_metric(Global.reduction_metric)
)
train_generator, valid_generator = create_data_pipline(params['batch_size'], params['samples'])
tg_steps_per_epoch = train_generator.n // train_generator.batch_size
vg_validation_steps = valid_generator.n // valid_generator.batch_size
print('Steps per Epoch: {}, Validation Steps: {}'.format(tg_steps_per_epoch, vg_validation_steps))
startTime = datetime.now()
out = model.fit(
x = train_generator,
epochs = params['epochs'],
validation_data = valid_generator,
steps_per_epoch = tg_steps_per_epoch,
validation_steps = vg_validation_steps,
#callbacks = [checkpointer]
workers = 8
)
print("Time taken:", datetime.now() - startTime)
return out, model
Hyperparameter list:
hyper_parameter = {
'samples': [20000],
'epochs': [1],
'batch_size': [32, 64],
'optimizer': [Adam],
'lr': [1, 2],
'first_neuron': [1024, 2048, 4096],
'dropout': [0.25, 0.50],
'activation': ['leakyrelu', 'relu'],
'hidden_layers': [0, 1, 2, 3, 4],
'leaky_alpha': [0.1] #Default bei LeakyReLU, sonst PReLU
}
Run Talos:
dummy_x = np.empty((1, 2, 3, 224, 224))
dummy_y = np.empty((1, 2))
with tf.device('/device:GPU:0'):
t = ta.Scan(
x = dummy_x,
y = dummy_y,
model = alexnet,
params = hyper_parameter,
experiment_name = '{}'.format(Global.dataset),
#shuffle=False,
reduction_metric = Global.reduction_metric,
disable_progress_bar = False,
print_params = True,
clear_session = 'tf',
save_weights = False
)
t.data.to_csv(Global.target_dir + Global.results, index = True)
The memory usage is always quite high but it does not rise over the epochs but it varies a little.
Nvidia SMI Output:
Can someone please help me here?
==========================================================================
What I already tried:
1) Splitting up the Talos run:
This caused the same error.
hyper_parameter = {
'samples': [20000],
'epochs': [1],
'batch_size': [32, 64],
'optimizer': [Adam],
'lr': [1, 2, 3, 5],
'first_neuron': [9999],
'dropout': [0.25, 0.50],
'activation': ['leakyrelu', 'relu'],
'hidden_layers': [9999],
'leaky_alpha': [0.1] #Default bei LeakyReLU, sonst PReLU
}
dummy_x = np.empty((1, 2, 3, 224, 224))
dummy_y = np.empty((1, 2))
first = True
for h in [0, 1, 2, 3, 4]:
hyper_parameter['hidden_layers']=[h]
for fn in [1024, 2048, 4096]:
hyper_parameter['first_neuron']=[fn]
with tf.device('/device:GPU:1'):
t = ta.Scan(
x = dummy_x,
y = dummy_y,
model = alexnet,
params = hyper_parameter,
experiment_name = '{}'.format(Global.dataset),
#shuffle=False,
reduction_metric = Global.reduction_metric,
disable_progress_bar = False,
print_params = True,
clear_session = 'tf',
save_weights = False
)
if(first):
t.data.to_csv(Global.target_dir + Global.results, index = True, mode='a')
first = False
else:
t.data.to_csv(Global.target_dir + Global.results, index = True, mode='a', header=False)
==========================================================================
2) Run the model within an own thread
Searching for the cause, I found that some people complain about the same issue and blame TensorFlow for not executing K.clear_session().
Maybe the idea is stupid af but I tried to train the model in an extra thread.
from threading import Thread
def gen_model_thread(x_train, y_train, x_val, y_val, params):
thread = Thread(target=alexnet, args=(x_train, y_train, x_val, y_val, params))
thread.start()
return_value = thread.join()
return return_value
with tf.device('/device:GPU:0'):
t = ta.Scan(
x = dummy_x,
y = dummy_y,
model = gen_model_thread,
params = hyper_parameter,
experiment_name = '{}'.format(Global.dataset),
#shuffle=False,
reduction_metric = Global.reduction_metric,
disable_progress_bar = False,
print_params = True,
clear_session = True,
save_weights = False
)
This caused a type error:
Traceback (most recent call last):
File "D:\anaconda\envs\tf_ks\lib\threading.py", line 926, in _bootstrap_inner
self.run()
File "D:\anaconda\envs\tf_ks\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "<ipython-input-3-2942ae0a0a56>", line 5, in gen_model
model = alexnet(params['activation'], params['leaky_alpha'])
File "<ipython-input-2-2a405202aa5a>", line 27, in alexnet
Dense(units = 2, activation=activation_layer)
File "D:\anaconda\envs\tf_ks\lib\site-packages\keras\engine\sequential.py", line 94, in __init__
self.add(layer)
File "D:\anaconda\envs\tf_ks\lib\site-packages\keras\engine\sequential.py", line 162, in add
name=layer.name + '_input')
File "D:\anaconda\envs\tf_ks\lib\site-packages\keras\engine\input_layer.py", line 178, in Input
input_tensor=tensor)
File "D:\anaconda\envs\tf_ks\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "D:\anaconda\envs\tf_ks\lib\site-packages\keras\engine\input_layer.py", line 87, in __init__
name=self.name)
File "D:\anaconda\envs\tf_ks\lib\site-packages\keras\backend\tensorflow_backend.py", line 73, in symbolic_fn_wrapper
if _SYMBOLIC_SCOPE.value:
AttributeError: '_thread._local' object has no attribute 'value'
TypeError: cannot unpack non-iterable NoneType object
I know, my last chance is to do it manually but I think I will head towards the same problem while training my model later anyway.
Many thanks for taking care of my problem, reading my question and correcting the spelling errors in my text^^.
I am looking forward to receiving constructive solutions from this amazing community here! (:
==========================================================================
GPU: NVIDIA RTX 2080Ti and Titan Xp Collectors Edition (i tried both)
TensorFlow: 2.1.0
Keras: 2.3.1
Talos: 1.0
Disabling eager execution solved the problem for me:
tf.compat.v1.disable_eager_execution()
https://github.com/autonomio/talos/issues/482
I'm trying to run the model on RandomizedSearchCV to get the best hyper parameters
model = keras.models.Sequential([keras.layers.Dense(300, input_dim=7500, activation="relu"),
keras.layers.Dense(300, activation = "relu", name = "Intermediate"),
keras.layers.Dense(100, activation = "relu"),
keras.layers.Dense(4, activation="softmax")])
keras_cls = keras.wrappers.scikit_learn.KerasClassifier(model)
param_distribs = {
"n_hidden": [0, 1, 2, 3],
"n_neurons": np.arange(1, 300),
"learning_rate": 1e-3
}
rnd_search_cv = RandomizedSearchCV(keras_cls, param_distribs, n_iter=10, cv=3)
rnd_search_cv.fit(x_train, y_train, epochs=30,
validation_data=(x_test, y_test),
callbacks=[keras.callbacks.EarlyStopping(patience=10)])
at rmd_search_cv.fit I'm getting the error TypeError: can't pickle _thread.RLock objects
What should be done to correct the error?
I have a nlp dataset, and according to the Pytorch official tutorial, I change the dataset to the word_to_idx and tag_to_idx, like:
word_to_idx = {'I': 0, 'have': 1, 'used': 2, 'transfers': 3, 'on': 4, 'three': 5, 'occasions': 6, 'now': 7, 'and': 8, 'each': 9, 'time': 10}
tag_to_idx = {'PRON': 0, 'VERB': 1, 'NOUN': 2, 'ADP': 3, 'NUM': 4, 'ADV': 5, 'CONJ': 6, 'DET': 7, 'ADJ': 8, 'PRT': 9, '.': 10, 'X': 11}
I want to complete the POS-Tagging task with BiLSTM. Here is my BiLSTM code:
class LSTMTagger(nn.Module):
def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
super(LSTMTagger, self).__init__()
self.hidden_dim = hidden_dim
self.word_embeddings = nn.Embedding(vocab_size, tagset_size)
# The LSTM takes word embeddings as inputs, and outputs hidden states
self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True)
# The linear layer that maps from hidden state space to tag space
self.hidden2tag = nn.Linear(in_features=hidden_dim * 2, out_features=tagset_size)
def forward(self, sentence):
embeds = self.word_embeddings(sentence)
lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
# tag_scores = F.softmax(tag_space, dim=1)
tag_scores = F.log_softmax(tag_space, dim=1)
return tag_scores
Then I run the training code in Pycharm, like:
EMBEDDING_DIM = 6
HIDDEN_DIM = 6
NUM_EPOCHS = 3
model = LSTMTagger(embedding_dim=EMBEDDING_DIM,
hidden_dim=HIDDEN_DIM,
vocab_size=len(word_to_idx),
tagset_size=len(tag_to_idx))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
# See what the scores are before training
with torch.no_grad():
inputs = prepare_sequence(training_data[0][0], word_to_idx)
tag_scores = model(inputs)
print(tag_scores)
print(tag_scores.size())
However, it shows error with line tag_scores = model(inputs) and line lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1)).
The error is:
Traceback (most recent call last):
line 140, in <module>
tag_scores = model(inputs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/torch/nn/modules/module.py", line 493, in __call__
result = self.forward(*input, **kwargs)
line 115, in forward
lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/torch/nn/modules/module.py", line 493, in __call__
result = self.forward(*input, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/torch/nn/modules/rnn.py", line 559, in forward
return self.forward_tensor(input, hx)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/torch/nn/modules/rnn.py", line 539, in forward_tensor
output, hidden = self.forward_impl(input, hx, batch_sizes, max_batch_size, sorted_indices)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/torch/nn/modules/rnn.py", line 519, in forward_impl
self.check_forward_args(input, hx, batch_sizes)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/torch/nn/modules/rnn.py", line 490, in check_forward_args
self.check_input(input, batch_sizes)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/torch/nn/modules/rnn.py", line 153, in check_input
self.input_size, input.size(-1)))
RuntimeError: input.size(-1) must be equal to input_size. Expected 6, got 12
I don't know how to debug with it. Could somebody help me fix this issue? Thanks in advance!
The error is here:
self.word_embeddings = nn.Embedding(vocab_size, tagset_size)
Instead of using the embedding dimension, you use the number of tags which is 12 and not 6 which is what the LSTM layer expects.
I am trying to define a class called XGBExtended that extends the class xgboost.XGBClassifier, the scikit-learn API for xgboost. I am running into some issues with the get_params method. Below is an IPython session illustrating the issue. Basically, get_params seems to only be returning the attributes I define within XGBExtended.__init__, and attributes defined during the parent init method (xgboost.XGBClassifier.__init__) are ignored. I am using IPython and running python 2.7. Full system specs at bottom.
In [182]: import xgboost as xgb
...:
...: class XGBExtended(xgb.XGBClassifier):
...: def __init__(self, foo):
...: super(XGBExtended, self).__init__()
...: self.foo = foo
...:
...: clf = XGBExtended(foo = 1)
...:
...: clf.get_params()
...:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-182-431c4c3f334b> in <module>()
8 clf = XGBExtended(foo = 1)
9
---> 10 clf.get_params()
/Users/andrewhannigan/lib/xgboost/python-package/xgboost/sklearn.pyc in get_params(self, deep)
188 if isinstance(self.kwargs, dict): # if kwargs is a dict, update params accordingly
189 params.update(self.kwargs)
--> 190 if params['missing'] is np.nan:
191 params['missing'] = None # sklearn doesn't handle nan. see #4725
192 if not params.get('eval_metric', True):
KeyError: 'missing'
So I've hit an error because 'missing' is not a key in the params dict within the XGBClassifier.get_params method. I enter the debugger to poke around:
In [183]: %debug
> /Users/andrewhannigan/lib/xgboost/python-package/xgboost/sklearn.py(190)get_params()
188 if isinstance(self.kwargs, dict): # if kwargs is a dict, update params accordingly
189 params.update(self.kwargs)
--> 190 if params['missing'] is np.nan:
191 params['missing'] = None # sklearn doesn't handle nan. see #4725
192 if not params.get('eval_metric', True):
ipdb> params
{'foo': 1}
ipdb> self.__dict__
{'n_jobs': 1, 'seed': None, 'silent': True, 'missing': nan, 'nthread': None, 'min_child_weight': 1, 'random_state': 0, 'kwargs': {}, 'objective': 'binary:logistic', 'foo': 1, 'max_depth': 3, 'reg_alpha': 0, 'colsample_bylevel': 1, 'scale_pos_weight': 1, '_Booster': None, 'learning_rate': 0.1, 'max_delta_step': 0, 'base_score': 0.5, 'n_estimators': 100, 'booster': 'gbtree', 'colsample_bytree': 1, 'subsample': 1, 'reg_lambda': 1, 'gamma': 0}
ipdb>
As you can see, the params contains only the foo variable. However, the object itself contains all of the params defined by xgboost.XGBClassifier.__init__. But for some reason the BaseEstimator.get_params method which is called from xgboost.XGBClassifier.get_params is only getting the parameters defined explicitly in the XGBExtended.__init__ method. Unfortunately, even if I explicitly call get_params with deep = True, it still does not work correctly:
ipdb> super(XGBModel, self).get_params(deep=True)
{'foo': 1}
ipdb>
Can anyone tell why this is happening?
System specs:
In [186]: print IPython.sys_info()
{'commit_hash': u'1149d1700',
'commit_source': 'installation',
'default_encoding': 'UTF-8',
'ipython_path': '/Users/andrewhannigan/virtualenvironment/nimble_ai/lib/python2.7/site-packages/IPython',
'ipython_version': '5.4.1',
'os_name': 'posix',
'platform': 'Darwin-14.5.0-x86_64-i386-64bit',
'sys_executable': '/usr/local/Cellar/python/2.7.10/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python',
'sys_platform': 'darwin',
'sys_version': '2.7.10 (default, Jul 3 2015, 12:05:53) \n[GCC 4.2.1 Compatible Apple LLVM 6.1.0 (clang-602.0.53)]'}
The problem here is incorrect declaration of child class.
When you declare the init method only using foo, you are overriding the original one. It will not be initialized automatically, even if the base class constructor is supposed to have default values for them.
You should use the following:
class XGBExtended(xgb.XGBClassifier):
def __init__(self, foo, max_depth=3, learning_rate=0.1,
n_estimators=100, silent=True,
objective="binary:logistic",
nthread=-1, gamma=0, min_child_weight=1,
max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
base_score=0.5, seed=0, missing=None, **kwargs):
# Pass the required parameters to super class
super(XGBExtended, self).__init__(max_depth, learning_rate,
n_estimators, silent, objective,
nthread, gamma, min_child_weight,
max_delta_step, subsample,
colsample_bytree, colsample_bylevel,
reg_alpha, reg_lambda,
scale_pos_weight, base_score, seed, missing, **kwargs)
# Use other custom parameters
self.foo = foo
After that you will not get any error.
clf = XGBExtended(foo = 1)
print(clf.get_params(deep=True))
>>> {'reg_alpha': 0, 'colsample_bytree': 1, 'silent': True,
'colsample_bylevel': 1, 'scale_pos_weight': 1, 'learning_rate': 0.1,
'missing': None, 'max_delta_step': 0, 'nthread': -1, 'base_score': 0.5,
'n_estimators': 100, 'subsample': 1, 'reg_lambda': 1, 'seed': 0,
'min_child_weight': 1, 'objective': 'binary:logistic',
'foo': 1, 'max_depth': 3, 'gamma': 0}