I want to make the following network definition to a parametric one. The number of continuous and discrete columns varies for different data. I first pass the whole input data, which in this case is 110 dimensional , from a linear with a relu activation. The output of each categorical field of my data varies based on a previous one-hot encoding data transformation. I need to define a nn.Linear(110, number of encodings) for each of them.
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__(110)
self.lin1 = nn.Linear(110,110)
self.lin_numerical = nn.Linear(110, 6)
self.lin_cat_job = nn.Linear(110, 9)
self.lin_cat_sex = nn.Linear(110, 2)
self.lin_cat_incomeclass = nn.Linear(110, 7)
def forward(self, x):
x = torch.relu(self.lin1(x))
x_numerical = f.leaky_relu(self.lin_numerical(x))
x_cat1 = f.gumbel_softmax(self.lin_cat_job(x), tau=0.2)
x_cat2 = f.gumbel_softmax(self.lin_cat_sex(x), tau=0.2)
x_cat3 = f.gumbel_softmax(self.lin_cat_incomeclass(x), tau=0.2)
x_final = torch.cat((x_numerical, x_cat1, x_cat2, x_cat3),1)
return x_final
I have managed to change the init part, using discrete_columns input which is an ordereddict that has the name and number of one-hot-encoding of each categorical field of my data as key and values, and continuous_columns which is only a list with the names of the continuous columns. But I have no idea how to edit the forward part:
class Generator(nn.Module):
def __init__(self, input_dim, continuous_columns, discrete_columns):
super(Generator, self).__init__()
self._input_dim = input_dim
self._discrete_columns = discrete_columns
self._num_continuous_columns = len(continuous_columns)
self.lin1 = nn.Linear(self._input_dim, self._input_dim)
self.lin_numerical = nn.Linear(self._input_dim, self._num_continuous_columns)
for key, value in self._discrete_columns.items():
setattr(self, "lin_cat_{}".format(key), nn.Linear(self._input_dim, value))
def forward(self, x):
x = torch.relu(self.lin1(x))
x_numerical = f.leaky_relu(self.lin_numerical(x))
####
This is the problematic part
#####
return x
You don't need to use setattr and honestly should not since you'd need getattr, it brings more trouble than it solves if there's any other ways to do the job.
Now this is what I'd do for this task
self.lin_cat = nn.ModuleDict()
for key, value in self._discrete_columns.items():
self.lin_cat[key] = nn.Linear(self._input_dim, value)
# setattr(self, "lin_cat_{}".format(key), nn.Linear(self._input_dim, value))
def forward(self, x):
x = torch.relu(self.lin1(x))
x_numerical = f.leaky_relu(self.lin_numerical(x))
x_cat = []
for key in self.lin_cat:
x_cat.append(f.gumbel_softmax(self.lin_cat[key](x), tau=0.2))
x_final = torch.cat((x_numerical, *x_cat), 1)
return x
Related
I have defined my model as follows:
class MyModel(nn.Module):
def __init__(self, hidden_dim, activation_func):
super(MyModel, self).__init__()
self.bert1 = BertForTokenClassification.from_pretrained("...")
self.bert2 = BertForTokenClassification.from_pretrained("...")
self.freezModel(self.bert1)
self.freezModel(self.bert2)
self.l1 = nn.Linear(2048 * 2 * 2, hidden_dim)
self.activation_func = activation_func
self.l2 = nn.Linear(hidden_dim, 1)
def freezModel(self, model):
if model is None:
return
for name, p in model.named_parameters():
p.requires_grad = False
def forward(self, x):
x1 = x[:,0,:]
x2 = x[:,1,:]
b1_out = torch.flatten(self.bert1(x1)[0], 1)
b2_out = torch.flatten(self.bert2(x2)[0], 1)
b_out = torch.cat((b1_out, b2_out), 1)
l1_out = self.l1(b_out)
activation_func_out = self.activation_func(l1_out)
l2_out = self.l2(activation_func_out)
return l2_out
my first question: Did I define my model ok?
and the second question is: How should I define the optimizer? and when should I use the parameters group?
This is the code of my instantiation of the class and the optimizer.
model = MyModel(100, nn.ReLU())
optim = torch.optim.AdamW(model.parameters(), lr=5e-5)
The input of my model has two arrays and each array goes to one BERT and the output of them after concatenation will send to the linear layer. The BERTs are just used for generating the embeddings and I have frozen them to not be changed during the training.
This code runs fine to create a simple feed-forward neural Network. The layer (torch.nn.Linear) is assigned to the class variable by using self.
class MultipleRegression3L(torch.nn.Module):
def __init__(self, num_features):
super(MultipleRegression3L, self).__init__()
self.layer_1 = torch.nn.Linear(num_features, 16)
## more layers
self.relu = torch.nn.ReLU()
def forward(self, inputs):
x = self.relu(self.layer_1(inputs))
x = self.relu(self.layer_2(x))
x = self.relu(self.layer_3(x))
x = self.layer_out(x)
return (x)
def predict(self, test_inputs):
return self.forward(test_inputs)
However, when I tried to store the layer using the list:
class MultipleRegression(torch.nn.Module):
def __init__(self, num_features, params):
super(MultipleRegression, self).__init__()
number_of_layers = 3 if not 'number_of_layers' in params else params['number_of_layers']
number_of_neurons_in_each_layer = [16, 32, 16] if not 'number_of_neurons_in_each_layer' in params else params['number_of_neurons_in_each_layer']
activation_function = "relu" if not 'activation_function' in params else params['activation_function']
self.layers = []
v1 = num_features
for i in range(0, number_of_layers):
v2 = number_of_neurons_in_each_layer[i]
self.layers.append(torch.nn.Linear(v1, v2))
v1 = v2
self.layer_out = torch.nn.Linear(v2, 1)
if activation_function == "relu":
self.act_func = torch.nn.ReLU()
else:
raise Exception("Activation function %s is not supported" % (activation_function))
def forward(self, inputs):
x = self.act_func(self.layers[0](inputs))
for i in range(1, len(self.layers)):
x = self.act_func(self.layers[i](x))
x = self.layer_out(x)
return (x)
The two models do not behave the same way. What can be wrong here?
Pytorch needs to keep the graph of the modules in the model, so using a list does not work. Using self.layers = torch.nn.ModuleList() fixed the problem.
i am a beginner of pytorch, and i want to build a fully connect model using Pytorch;
the model is very simple like:
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
return self.fc3(x)
but when i want to add some layers or adjust the hidden layers, i found i have to write lots of Redundant code like:
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.relu(self.fc3(x))
x = self.relu(self.fc4(x))
x = self.relu(self.fc5(x))
...
return self.fcn(x)
besides, if i want to change some layer's feature nums, i have to change the layer adjacent
so i want to know a way which is more grace(maybe more pythonic and more easy to adjust hyper parameter).
i tried to write code like:
def __init__(self):
super().__init__()
self.hidden_num = [2881, 5500, 2048, 20] # i just want to change here! to try some new structure
self.fc = [nn.Linear(self.hidden_num[i], self.hidden_num[i + 1]).to(DEVICE) for i in range(len(self.hidden_num) - 1)]
self.relu = nn.ReLU()
def forward(self, x):
for i in range(len(self.fc)):
x = self.fc[i](x)
if i != (len(self.fc) - 1):
x = self.relu(x)
return x
but i found this way doesn't work, the model can't be built
so could any bro tell me, how to define a fullyconnect model like above??
(so i can adjust the model layers only by adjust the list named hidden_num )
If you want to keep the same approach then you can use nn.ModuleList to properly register all linear layers inside the module's __init__:
class Model(nn.Module):
def __init__(self, hidden_num=[2881, 5500, 2048, 20]):
super().__init__()
self.fc = nn.ModuleList([
nn.Linear(hidden_num[i], hidden_num[i+1])
for i in range(len(hidden_num) - 1)])
def forward(self, x):
for i, m in enumerate(self.fc.children()):
x = m(x)
print(i)
if i != len(self.fc) - 1:
x = torch.relu(x)
return x
However, you may want to handle the logic inside the __init__ function once. One alternative is to use nn.Sequential.
class Model(nn.Module):
def __init__(self, hidden_num=[2881, 5500, 2048, 20]):
super().__init__()
fc = []
for i in range(len(hidden_num) - 1):
fc.append(nn.Linear(hidden_num[i], hidden_num[i+1]))
if i != len(self.fc) - 1:
fc.append(nn.ReLU())
self.fc = nn.Sequential(fc)
def forward(self, x):
x = self.fc(x)
return x
Ideally, you would inherit from nn.Sequential directly to avoid re-writing the forward function which is unnecessary in this case:
class Model(nn.Sequential):
def __init__(self, hidden_num=[2881, 5500, 2048, 20]):
fc = []
for i in range(len(hidden_num) - 1):
fc.append(nn.Linear(hidden_num[i], hidden_num[i+1]))
if i != len(self.fc) - 1:
fc.append(nn.ReLU())
super().__init__(fc)
I have a custom Pytorch dataset that returns a dictionary containing a class object "queries".
class QueryDataset(torch.utils.data.Dataset):
def __init__(self, queries, values, targets):
super(QueryDataset).__init__()
self.queries = queries
self.values = values
self.targets = targets
def __len__(self):
return self.values.shape[0]
def __getitem__(self, idx):
sample = DeviceDict({'query': self.queries[idx],
"values": self.values[idx],
"targets": self.targets[idx]})
return sample
The problem is that when I put the queries in a data loader I get default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'query.Query'>. Is there a way to have a class object in my data loader? It blows up at next(iterator) in the code below.
train_queries = QueryDataset(train_queries)
train_loader = torch.utils.data.DataLoader(train_queries,
batch_size=10],
shuffle=True,
drop_last=False)
for i in range(epochs):
iterator = iter(train_loader)
for i in range(len(train_loader)):
batch = next(iterator)
out = model(batch)
loss = criterion(out["pred"], batch["targets"])
self.optimizer.zero_grad()
loss.sum().backward()
self.optimizer.step()
You need to define your own colate_fn in order to do this.
A sloppy approach just to show you how stuff works here, would be something like this:
import torch
class DeviceDict:
def __init__(self, data):
self.data = data
def print_data(self):
print(self.data)
class QueryDataset(torch.utils.data.Dataset):
def __init__(self, queries, values, targets):
super(QueryDataset).__init__()
self.queries = queries
self.values = values
self.targets = targets
def __len__(self):
return 5
def __getitem__(self, idx):
sample = {'query': self.queries[idx],
"values": self.values[idx],
"targets": self.targets[idx]}
return sample
def custom_collate(dict):
return DeviceDict(dict)
dt = QueryDataset("q","v","t")
dl = torch.utils.data.DataLoader(dtt,batch_size=1,collate_fn=custom_collate)
t = next(iter(dl))
t.print_data()
Basically colate_fn allows you to achieve custom batching or adding support for custom data types as explained in the link I previously provided.
As you see it just shows the concept, you need to change it based on your own needs.
For those curious, this is the DeviceDict and custom collate function that I used to get things to work.
class DeviceDict(dict):
def __init__(self, *args):
super(DeviceDict, self).__init__(*args)
def to(self, device):
dd = DeviceDict()
for k, v in self.items():
if torch.is_tensor(v):
dd[k] = v.to(device)
else:
dd[k] = v
return dd
def collate_helper(elems, key):
if key == "query":
return elems
else:
return torch.utils.data.dataloader.default_collate(elems)
def custom_collate(batch):
elem = batch[0]
return DeviceDict({key: collate_helper([d[key] for d in batch], key) for key in elem})
I am trying to implement normalizing flows embedded in a Keras model. In all examples I can find, such as the documentation of MAF, the bijectors which constitute the normalizing flows are embedded into a TransformedDistribution and exposed directly for training etc.
I am trying to embed this TransformedDistribution in a keras Model to match the architecture of other models I have which are inheriting from keras Model.
Unfortunately all my attempts (see code) so far fail at transferring the trainable variables inside the transformed distribution to the keras Model.
I have tried to make the bijector inherit from tf.keras.layers.Layer, which did not change anything.
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors
class Flow(tfb.Bijector, tf.Module):
"""
tf.Module to register trainable_variables
"""
def __init__(self, d, init_sigma=0.1, **kwargs):
super(Flow, self).__init__(
dtype=tf.float32,
forward_min_event_ndims=0,
inverse_min_event_ndims=0,
**kwargs
)
# Shape of the flow goes from Rd to Rd
self.d = d
# Weights/Variables initializer
self.init_sigma = init_sigma
w_init = tf.random_normal_initializer(stddev=self.init_sigma)
# Variables
self.u = tf.Variable(
w_init(shape=[1, self.d], dtype=tf.float32),
dtype=tf.float32,
name='u',
trainable=True,
)
def _forward(self, x):
return x
def _inverse(self, y):
return y
class Flows(tf.keras.Model):
def __init__(self, d=2, shape=(100, 2), n_flows=10, ):
super(Flows, self).__init__()
# Parameters
self.d = d
self.shape = shape
self.n_flows = n_flows
# Base distribution - MF = Multivariate normal diag
base_distribution = tfd.MultivariateNormalDiag(
loc=tf.zeros(shape=shape, dtype=tf.float32)
)
# Flows as chain of bijector
flows = []
for n in range(n_flows):
flows.append(Flow(self.d, name=f"flow_{n + 1}"))
bijector = tfb.Chain(list(reversed(flows)))
self.flow = tfd.TransformedDistribution(
distribution=base_distribution,
bijector=bijector
)
def call(self, *inputs):
return self.flow.bijector.forward(*inputs)
def log_prob(self, *inputs):
return self.flow.log_prob(*inputs)
def sample(self, num):
return self.flow.sample(num)
q = Flows()
# Call to instantiate variables
q(tf.zeros(q.shape))
# Prints no trainable params
print(q.summary())
# Prints expected trainable params
print(q.flow.trainable_variables)
Any idea if this is even possible? Thanks!
I bumped into this issue as well. It seems to be caused by the incompatibility issues between TFP and TF 2.0 (a couple relevant issues https://github.com/tensorflow/probability/issues/355 and https://github.com/tensorflow/probability/issues/946).
As a workaround, you need to add the (trainable) variables of your transformed distribution / bijector as an attribute to your Keras Model:
class Flows(tf.keras.Model):
def __init__(self, d=2, shape=(100, 2), n_flows=10, ):
super(Flows, self).__init__()
# Parameters
self.d = d
self.shape = shape
self.n_flows = n_flows
# Base distribution - MF = Multivariate normal diag
base_distribution = tfd.MultivariateNormalDiag(
loc=tf.zeros(shape=shape, dtype=tf.float32)
)
# Flows as chain of bijector
flows = []
for n in range(n_flows):
flows.append(Flow(self.d, name=f"flow_{n + 1}"))
bijector = tfb.Chain(list(reversed(flows)))
self.flow = tfd.TransformedDistribution(
distribution=base_distribution,
bijector=bijector
)
# issue: https://github.com/tensorflow/probability/issues/355, https://github.com/tensorflow/probability/issues/946
# need to add bijector's trainable variables as an attribute (name does not matter)
# otherwise this layer has zero trainable variables
self._variables = self.flow.variables # https://github.com/tensorflow/probability/issues/355
def call(self, *inputs):
return self.flow.bijector.forward(*inputs)
def log_prob(self, *inputs):
return self.flow.log_prob(*inputs)
def sample(self, num):
return self.flow.sample(num)
After adding this your model should have trainable variables and weights to optimize.