Defining model blocks in tf.keras - python

I'm experimenting with my model's architecture and I would like to have several predefined blocks of layers that I could mix at will. I thought that creating a different class for each of this block structure would make it easier, and I figured that subclassing the Model class in tf.keras was the way to go. So I have done the following (Toy example, yet long. Sorry!).
class PoolingBlock(Model):
def __init__(self, filters, stride, name):
super(PoolingBlock, self).__init__(name=name)
self.bn = BatchNormalization()
self.conv1 = Conv1D(filters=filters, kernel_size=1, padding='same')
self.mp1 = MaxPooling1D(stride, padding='same')
def call(self, input_tensor, training=False, mask=None):
x = self.bn(input_tensor)
x = tf.nn.relu(x)
x = self.conv1(x)
x = self.mp1(x)
return x
class ModelA(Model):
def __init__(self, n_dense, filters, stride, name):
super(ModelA, self).__init__(name=name)
self.d1 = Dense(n_dense, "DenseLayer1")
self.pb1 = PoolingBlock(filters, stride, name="PoolingBlock_1")
self.d2 = Dense(n_dense, "DenseLayer2")
def call(self, inputs, training=False, mask=None):
x = inputs
x = self.d1(x)
x = self.pb1(x)
x = self.d2(x)
return x
model = ModelA(100, 10, 2, 'ModelA')
model.build(input_shape=x.shape)
Then I continue with model.compile(...) and model.fit(...) as usual. But when training, I receive this warning:
WARNING:tensorflow:Entity < bound method PoolingBlock.call of
< model.PoolingBlock object at 0x7fe09ca04208 > > could not be
transformed and will be executed as-is. Please report this to the
AutgoGraph team. When filing the bug, set the verbosity to 10 (on
Linux, export AUTOGRAPH_VERBOSITY=10) and attach the full output.
Cause: converting < bound method PoolingBlock.call of
< model.PoolingBlock object at 0x7fe09ca04208 > >: AttributeError:
module 'gast' has no attribute 'Num'
I don't understand what that means. I am wondering if my training is going as I have planned, if this way of subclassing is correct and solid, if I can suppress this warning somehow.

Kindly try to downgrade the version of gast
pip install gast==0.2.2
And then re-train the network

Related

How to return extra loss from module forward function in PyTorch?

I made a module, that needs an extra loss term, e.g.
class MyModule:
def forward(self, x):
out = f(x)
extra_loss = loss_f(self.parameters(), x)
return out, extra_loss
I can't figure out how to make this module embeddable, for example, into a Sequential model: any regular module like Linear put after this one will fail because extra_loss causes the input to Linear to be a tuple, which Linear does not support.
So what I am looking for is extracting that extra loss after running the model forward
my_module = MyModule()
model = Sequential(
my_module,
Linear(my_module_outputs, 1)
)
output = model(x)
my_module_loss = ????
loss = mse(label, output) + my_module_loss
Does module composability support this scenario?
IMHO, hooks here is overreaction. Provided extra_loss is additive, we can use global variable like this:
class MyModule:
extra_loss =0
def forward(self, x):
out = f(x)
MyModule.extra_loss += loss_f(self.parameters(), x)
return out
output = model(x)
loss = mse(label, output) + MyModule.extra_loss
MyModule.extra_loss =0
You can register a hook in this case. A hook can be registered on a Tensor or a nn.Module. A hook is a function that is executed when the either forward or backward is called. In this case, we want to attach a forward hook without deattaching itself from the graph so that backward pass can happen.
import torch.nn as nn
act_out = {}
def get_hook(name):
def hook(m, input, output):
act_out[name] = output
return hook
class MyModule(torch.nn.Module):
def __init__(self, input, out, device=None):
super().__init__()
self.model = nn.Linear(input,out)
def forward(self,x):
return self.model(x), torch.sum(x) #our extra loss
class MyModule1(torch.nn.Module):
def __init__(self, input, out, device=None):
super().__init__()
self.model = nn.Linear(input,out)
def forward(self, pair):
x, loss = pair
return self.model(x)
model = nn.Sequential(
MyModule(5,10),
MyModule1(10,1)
)
for name, module in model.named_children():
print(name, module)
if name == '0':
module.register_forward_hook(get_hook(name))
x = torch.tensor([1,2,3,4,5]).float()
out = model(x)
print(act_out)
loss = myanotherloss(out)+act_out['0'][1] # this is the extra loss
# further processing
Note: I am using name == '0' because this is the only module where I want to attach the hook.
Note: Another notable point is nn.Sequential doesn't allow multiple inputs. In this case, it is simply considered as a tuple and then from that tuple we are using the loss and the input.

Input parameters from a nested class to Pytorch Optimization Function

I have the following Graph neural network model and I am not able to get the learnable parameters of the model to do optimization.
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
class Graphconvlayer(nn.Module):
def __init__(self,adj,input_feature_neurons,output_neurons):
super(Graphconvlayer, self).__init__()
self.adj=adj
self.input_feature_neurons=input_feature_neurons
self.output_neurons=output_neurons
self.weights=Parameter(torch.normal(mean=0.0,std=torch.ones(input_feature_neurons,output_neurons)))
self.bias=Parameter(torch.normal(mean=0.0,std=torch.ones(output_neurons)))
def forward(self,inputfeaturedata):
output1= torch.mm(self.adj,inputfeaturedata)
print(output1.shape)
print(self.weights.shape)
print(self.bias.shape)
output2= torch.matmul(output1,self.weights)+ self.bias
return output2
class GCN(nn.Module):
def __init__(self,adj,input_feature_neurons,output_neurons,lr,dropoutvalue,hidden,data):
super(GCN, self).__init__()
self.adj=adj
self.input_feature_neurons=input_feature_neurons
self.output_neurons=output_neurons
self.lr=lr
self.dropoutvalue=dropoutvalue
self.hidden=hidden
self.data=data
self.gcn1 = Graphconvlayer(adj,input_feature_neurons,hidden)
self.gcn2 = Graphconvlayer(adj,hidden,output_neurons)
def forward(self,x):
x= F.relu(self.gcn1(x))
x= F.dropout(x,self.dropoutvalue)
x= self.gcn2(x)
print("opop")
return F.log_softmax(x,dim=1)
for n, p in a.named_parameters():
print(n, p.shape)
>>>
gcn1.weights torch.Size([1433, 2708])
gcn1.bias torch.Size([2708])
gcn2.weights torch.Size([2708, 7])
gcn2.bias torch.Size([7])
>>>
optimizer= optim.Adam(a.named_parameters()),lr=0.001)
>>>
NameError: name 'optim' is not defined
When I pass it as a dict(a.named_parameters()), I am able to print the values, but can not pass it to the optimization function. Can anyone guide me through this?

How to implement binary mask matrix in Keras?

I'm currently working on a project and part of it is reimplementing a model written for a paper in PyTorch in Keras. The overal model classifies proteins based on three elements of their properties: sequence, interaction with other proteins, and domains in their sequence (motifs). The part I'm working on recreating currently is the Protein-Protein Interaction part. Firstly, the input vectors simply go through some fully connected layers which is easy enough to implement in keras. However, the outputs from this model are fed into a 'weight classifier model' which applies a binary mask matrix to inputs using a layer created specifically for this model using PyTorch's nn.functional API.
Here is the code I am struggling to implement in keras:
class Weight_classifier(nn.Module):
def __init__(self, func):
super(Weight_classifier, self).__init__()
# self.weight_layer = nn.Linear(OUT_nodes[func]*3, OUT_nodes[func])
self.weight_layer = MaskedLinear(OUT_nodes[func]*3, OUT_nodes[func], 'data/{}_maskmatrix.csv'.format(func)).cuda()
self.outlayer= nn.Linear(OUT_nodes[func], OUT_nodes[func])
def forward(self, weight_features):
weight_out = self.weight_layer(weight_features)
# weight_out = F.sigmoid(weight_out)
weight_out = F.relu(weight_out)
weight_out = F.sigmoid(self.outlayer(weight_out))
return weight_out
class MaskedLinear(nn.Linear):
def __init__(self, in_features, out_features, relation_file, bias=True):
super(MaskedLinear, self).__init__(in_features, out_features, bias)
mask = self.readRelationFromFile(relation_file)
self.register_buffer('mask', mask)
self.iter = 0
def forward(self, input):
masked_weight = self.weight * self.mask
return F.linear(input, masked_weight, self.bias)
def readRelationFromFile(self, relation_file):
mask = []
with open(relation_file, 'r') as f:
for line in f:
l = [int(x) for x in line.strip().split(',')]
for item in l:
assert item == 1 or item == 0 # relation 只能为0或者1
mask.append(l)
return Variable(torch.Tensor(mask))
And this is the paper I am working to, it contains several diagrams and explanations of the models if I have not explained the issue sufficiently.
Many thanks.

How to use multiple models in Keras model subclass API

I am trying to train a fairly complex model that uses multiple frozen pre-trained models and has a custom training loop with a fairly complicated multi-task loss function. Because of these complexities, my plan was to define multiple separate Keras models within the subclassed model. I have been having problems with my setup and I've been able to simplify it to a simple example that demonstrates the problem.
The code below trains a simple model called MainModel, which uses the Keras model subclassing API, but it is basically just a Sequential([Conv1d(), Conv1d()]) model. When I define another model in the same class, self.aux_model, the original model no longer trains properly. In the example, self.aux_model doesn't play any role in the training, it is only defined, never used. Specifically, after each training iteration, the weight values are the same as they were at the beginning of the iteration. So, the model weights are never updated, even though the gradients have non-zero values.
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.callbacks import Callback
num_epochs = 5
steps_per_epoch = 100
audio_len = 16000
class WeightChecker:
"""Automated health checks for training Keras models."""
def __init__(self, model):
self.initial_model = model
self.var_names = [var.name for var in model.trainable_variables]
self.prev_weights = model.get_weights()
def check_epoch(self, model):
"""Checks to run at the end of an epoch"""
self.check_untrained_params(model)
def check_untrained_params(self, model):
"""Compare self.model.trainable_variables to self.prev_weights"""
passed = True
curr_weights = model.get_weights()
for curr_var, prev_var, var_name in zip(curr_weights, self.prev_weights, self.var_names):
eq = np.equal(curr_var, prev_var).all()
if eq:
passed = False
print(f"\nWarning: Variable {var_name} was not updated with training. "
f"Confirm that this layer is correctly "
f"connected to the computation graph.")
self.prev_weights = [w.copy() for w in curr_weights]
return passed
class WeightCheckerCallback(Callback):
"""Check model initialization and run training checks.
"""
def __init__(self):
super().__init__()
self.weight_check = None
def setup_weight_checker(
self,
model: tf.keras.Model = None):
"""Initialize the callback with an input_batch and targets."""
self.weight_check = WeightChecker(model)
def on_train_begin(self, logs=None):
if self.weight_check is None:
raise ValueError("setup_weight_checker() must be called to use WeightCheckerCallback.")
def on_epoch_end(self, epoch, logs=None):
self.weight_check.check_epoch(self.model)
class MainModel(tf.keras.Model):
"""Main Model."""
def __init__(self):
super().__init__()
self.feature_dim = 128
self.aux_model = self._set_aux_model()
self.map_model = tf.keras.Sequential([tf.keras.layers.Conv1D(
64, 3, padding='same'
),
tf.keras.layers.Conv1D(
1, 3, padding='same'
)])
def call(self, inputs, training=True):
output = self.map_model(inputs)
return output
def train_step(self, data):
mixed_audio = data[0]
clean_audio = data[1]
with tf.GradientTape() as tape:
decoded_audio = self.map_model(mixed_audio)
total_loss = tf.reduce_mean(tf.abs(decoded_audio - clean_audio))
grads = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
losses = {
'loss': total_loss,
}
return losses
#staticmethod
def _set_aux_model():
"""Set an auxiliary model."""
model = tf.keras.Sequential([tf.keras.layers.Dense(1)])
model.build(input_shape=(None, 1))
model.trainable = False
return model
class TrainingTask:
"""A Keras model training task."""
def __init__(self):
self.model, self.stateful_model = self._set_model()
self.callbacks = [WeightCheckerCallback()]
#staticmethod
def _set_model():
model = MainModel()
# Build the model with fake data.
model.compile(optimizer='adam')
fake_data = np.random.randn(1,
audio_len,
1)
fake_data = fake_data.astype(np.float32)
model(fake_data, training=True)
return model, None
def fit(self):
"""Custom model fit method."""
try:
weight_checker_callback_index = [isinstance(cb, WeightCheckerCallback)
for cb in self.callbacks].index(True)
except ValueError:
weight_checker_callback_index = None
if weight_checker_callback_index is not None:
self.callbacks[weight_checker_callback_index].setup_weight_checker(
model=self.model
)
for callback in self.callbacks:
callback.set_model(self.model)
print("\nBegin training")
for callback in self.callbacks:
callback.on_train_begin()
for epoch in range(num_epochs):
for callback in self.callbacks:
callback.on_epoch_begin(epoch)
for batch in range(steps_per_epoch):
x, y = next(data_gen_batch())
for callback in self.callbacks:
callback.on_batch_begin(batch)
metrics = self.model.train_step([x, y])
batch_loss = np.mean(metrics.pop('loss'))
print(batch, epoch, batch_loss)
for callback in self.callbacks:
callback.on_batch_end(batch, metrics)
print(f'Epoch: {epoch}')
numeric_metrics = dict()
numeric_metrics['loss'] = batch_loss
for callback in self.callbacks:
callback.on_epoch_end(epoch, numeric_metrics)
def data_gen():
"""Generate random data for training."""
data = (np.random.random((audio_len, 1)), np.random.random((audio_len, 1)))
while True:
yield data
def data_gen_batch(batch_size=8):
"""Generate random data in batches for training."""
data = next(data_gen())
data_batch = (np.stack([data[0]] * batch_size, axis=0),
np.stack([data[1]] * batch_size, axis=0))
while True:
yield data_batch
if __name__ == '__main__':
task = TrainingTask()
task.fit()
The classes WeightCheckerCallback and WeightChecker are a callback that I defined to illustrate the problem, which would otherwise result in a silent failure. In addition to some output from each training step, the code will produce the following warnings about layers of the map_model, which should be updating (aux_model only has a Dense layer):
Warning: Variable main_model/sequential_1/conv1d/kernel:0 was not updated with training. Confirm that this layer is correctly connected to the computation graph.
Warning: Variable main_model/sequential_1/conv1d/bias:0 was not updated with training. Confirm that this layer is correctly connected to the computation graph.
However, if the aux_model is commented out, the warnings will not appear and the model weights will be updated as expected.
# self.aux_model = self._set_aux_model()
Obviously, there are several ways in tensorflow to get this simple Sequential model to train properly, so I'm not just looking for a workaround to get this particular example working. Rather, I'm hoping that someone can explain what is going on with this example in terms of the Tensorflow sessions and graphs involved, as well as what the best practices are for avoiding conflicts between multiple different Keras models when nesting them with the subclass API. My ultimate goal is to train a more complex system of models using a similar framework.
This turned out to be a problem with the way that I was checking the layers with the WeightChecker class. model.get_weights() returns all the weights, not just the trainable weights. Therefore, when we use zip() in the for loop, we are zipping together lists with different lengths, and this causes the name of the layer that's not being updated to be misreported. The bug can be solved by using the following rather than model.get_weights():
self.prev_weights = [var.numpy() for var in model.trainable_variables]

How to take the output of one model as the input of another one with Tensorflow r-1.0?

I have defined two classes of models, x and y.
class x():
def __init__(self, x_inp1, x_inp2):
# do sth...
def step(self, session, encoder_inputs):
input_feed = {}
for l in range(encoder_size):
input_feed[self.encoder_inputs[l].name] = encoder_inputs[l]
...
output_feed = [x_output]
return session.run(x_output)
class y():
def __init__(self, y_inp1, y_inp2):
# do sth...
def step(self, encoder_inputs):
input_feed = {}
for l in range(encoder_size):
input_feed[self.encoder_inputs[l].name] = encoder_inputs[l]
...
They have quite similar functions. And then I define another class to group them up.
class gp():
def __init__(self, x_inp1, x_inp2, y_inp1, y_inp2):
with tf.variable_scope('x'):
self.x_model = x(x_inp1, x_inp2)
with tf.variable_scope('y'):
self.y_model = y(y_inp1, y_inp2)
def step(self, session, encoder_inputs):
x_output = self.x_model.step(session, encoder_inputs)
y_output = self.y_model.step(session, x_output)
...
Please notice that the y_model takes the output of x_model as input. And I run the gp() in the main function:
with tf.Session() as sess:
gp_m = gp(x_inp1, x_inp2, y_inp1, y_inp2)
gp_m.step(sess, x_inp1, x_inp2, y_inp1, y_inp2)
And after running x_output = self.x_model.step(encoder_inputs) and begin to do y_output = self.y_model.step(x_output), I got such an error:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'x/encoder0' with dtype int32
[[Node: x/encoder0 = Placeholder[dtype=DT_INT32, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Please notice this error points to the x_model even the step function of it has been finished. I wonder how can I use the output of x_model as the input of y_model without any error? Thanks in advance!
You should defer the calls to session.run to be outside the step functions. The problem here is that trying to run Y triggers X because they are connected in the graph.
Instead, it might be better to fully separate your graph build and graph run stages of your program, so you know what placeholders to provide when.

Categories