I am trying to implement StyleGAN with TensorFlow version 2 and I have no idea how to do an equalized learning rate. I tried to scale gradients this way:
def equalize_in_list(datalist):
for i in range(len(datalist)):
if (datalist[i] is list):
equalize_in_list(datalist[i])
else:
datalist[i] = datalist[i] * np.sqrt(2)/np.prod(datalist[i].shape)
return datalist
gen_grad = equalize_in_list(gen_grad)
disc_grad = equalize_in_list(disc_grad)
But it doesn't work correctly.
You can just create a custom layer.
class DenseEQ(Dense):
"""
Standard dense layer but includes learning rate equilization
at runtime as per Karras et al. 2017.
Inherits Dense layer and overides the call method.
"""
def __init__(self, **kwargs):
if 'kernel_initializer' in kwargs:
raise Exception("Cannot override kernel_initializer")
super().__init__(kernel_initializer=normal(0,1), **kwargs)
def build(self, input_shape):
super().build(input_shape)
# The number of inputs
n = np.product([int(val) for val in input_shape[1:]])
# He initialisation constant
self.c = np.sqrt(2/n)
def call(self, inputs):
output = K.dot(inputs, self.kernel*self.c) # scale kernel
if self.use_bias:
output = K.bias_add(output, self.bias, data_format='channels_last')
if self.activation is not None:
output = self.activation(output)
return output
And then create a model as you normally would... (But you'll have to specify its arguments explicitly e.g. : units=x. Positionaly arguments will not work)
model_in = Input(shape(12,))
x = DenseEq(name="whatever_1", units=16)(model_in)
x = LeakyRelu(0.2)(x)
x = DenseEq(name="whatever_2", units=1)(model_in)
model_out = LeakyRelu(0.2)(x)
model = Model(model_in, model_out)
You can do the same thing for a convolution.
class Conv2DEQ(Conv2D):
"""
Standard Conv2D layer but includes learning rate equilization
at runtime as per Karras et al. 2017.
Inherits Conv2D layer and overrides the call method, following
https://github.com/keras-team/keras/blob/master/keras/layers/convolutional.py
"""
def __init__(self, **kwargs):
if 'kernel_initializer' in kwargs:
raise Exception("Cannot override kernel_initializer")
super().__init__(kernel_initializer=normal(0,1), **kwargs)
def build(self, input_shape):
super().build(input_shape)
# The number of inputs
n = np.product([int(val) for val in input_shape[1:]])
# He initialisation constant
self.c = np.sqrt(2/n)
def call(self, inputs):
if self.rank == 2:
outputs = K.conv2d(
inputs,
self.kernel*self.c, # scale kernel
strides=self.strides,
padding=self.padding,
data_format=self.data_format,
dilation_rate=self.dilation_rate)
if self.use_bias:
outputs = K.bias_add(
outputs,
self.bias,
data_format=self.data_format)
if self.activation is not None:
return self.activation(outputs)
return outputs
Related
I'm following Quantization aware training comprehensive guide and struggling with QAT for custom layers, working with tf=2.6.0, py=3.9.7.
Below is a toy example of my problem:
I wrote a simple custom layer that implements Conv2D
class MyConv(tf.keras.layers.Layer):
'''costume conv2d'''
def __init__(self, filt=1, name=None, **kwargs):
super(MyConv, self).__init__(name=name)
self.filt = filt
super(MyConv, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({"filt": self.filt})
return config
def build(self, shape):
self.conv = tf.keras.layers.Conv2D(self.filt, 1, padding="same")
def call(self, input):
return self.conv(input)
I've created a small model with that layer, then recursively pass over its layers and annotates them using tfmot.guantization.keras.quantize_annotate_layer (each custom layer could have more custom sub-layers that needs to be quantized). Then I apply tfmot.quantization.keras.quantize_apply to the annotated model. The result model consists of all the quantized layers, except of my custom layer, that had not been quantized.
I'll note that when I'm replacing the custom layer MyConv with the code below, as in the comprehensive guide, the quantization works.
def MyConv(tf.keras.layers.Conv2D):
pass
Please help me solve this issue. Might be some issue with my QuantizeConfig?
Below is my full code:
import tensorflow as tf
import tensorflow_model_optimization as tfmot
class MyConv(tf.keras.layers.Layer):
'''costume conv2d'''
def __init__(self, filt=1, name=None, **kwargs):
super(MyConv, self).__init__(name=name)
self.filt = filt
super(MyConv, self).__init__(**kwargs)
def get_config(self):
config = super().get_config().copy()
config.update({"filt": self.filt})
return config
def build(self, shape):
self.conv = tfmot.quantization.keras.quantize_annotate_layer(tf.keras.layers.Conv2D(self.filt, 1, padding="same"))
def call(self, input):
return self.conv(input)
def get_toy_model():
input = tf.keras.Input((10, 10, 1), name='input')
x = tf.keras.layers.Conv2D(1, 3, padding="same")(input)
x = tf.keras.layers.ReLU()(x)
x = MyConv()(x)
for _ in range(2):
y = tf.keras.layers.Conv2D(1, 3, padding="same")(x)
y = tf.keras.layers.ReLU()(y)
out = tf.keras.layers.Conv2D(1, 3, padding="same")(y)
return tf.keras.Model(input, out, name='toy_Conv2D')
LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer
class DefaultCostumeQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
return []
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return []
def set_quantize_weights(self, layer, quantize_weights):
pass
def set_quantize_activations(self, layer, quantize_activations):
pass
# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(num_bits=8, per_axis=False, symmetric=False, narrow_range=False)]
def get_config(self):
return {}
def recursive_depth_layers(layer):
for l in list(layer.__dict__.values()):
if isinstance(l, tf.keras.layers.Layer):
recursive_depth_layers(l)
if isinstance(l, (
tf.keras.layers.Dense, tf.keras.layers.Conv2D, tf.keras.layers.ReLU, tf.keras.layers.LeakyReLU, tf.keras.layers.Activation)):
ql = tfmot.quantization.keras.quantize_annotate_layer(l, DefaultCostumeQuantizeConfig())
ql._name += "_" + l.name
return ql
def apply_quantization(layer):
# regular layer
if isinstance(layer, (tf.keras.layers.Dense, tf.keras.layers.Conv2D, tf.keras.layers.ReLU, tf.keras.layers.LeakyReLU,tf.keras.layers.Activation)):
l = tfmot.quantization.keras.quantize_annotate_layer(layer, DefaultCostumeQuantizeConfig())
l._name += '_' + layer.name
return l
if layer.__module__ == "__main__":
# custom layer
recursive_depth_layers(layer)
l = tfmot.quantization.keras.quantize_annotate_layer(layer, DefaultCostumeQuantizeConfig())
l._name += '_' + layer.name
return l
return layer
model = get_toy_model()
model.summary()
annotated_model = tf.keras.models.clone_model(model, clone_function=apply_quantization)
annotated_model.summary()
quantize_scope = tfmot.quantization.keras.quantize_scope
with quantize_scope({'DefaultCostumeQuantizeConfig': DefaultCostumeQuantizeConfig, 'MyConv': MyConv}):
quant_aware_model = tfmot.quantization.keras.quantize_apply(annotated_model)
quant_aware_model._name += "_quant"
quant_aware_model.summary()
quant_aware_model.compile()
I was running a simple MLP network with customized learning algorithms. It worked fine on the training set, but I got this error when I entered additional code to check the test accuracy. How can I fix it?
Test Accuracy code
epochs = 1
for epcoh in range(epochs):
model_bp.eval()
model_fa.eval()
test_loss_bp = 0
correct_bp = 0
test_loss_fa = 0
correct_fa = 0
with torch.no_grad():
for idx_batch, (inputs, targets) in enumerate(test_loader):
output_bp = model_bp(inputs)
output_fa = model_fa(inputs)
# sum up batch loss
test_loss_bp += loss_crossentropy(output_bp, targets).item()
test_loss_bp += loss_crossentropy(output_fa, targets).item()
# get the index of the max log-probability
## predict_bp = outputs_bp.argmax(dim=1, keepdim=True)
predict_bp = torch.max(output_bp.data,1)[1]
correct_bp += predict_bp.eq(targets.view_as(predict_bp)).sum().item()
predict_fa = torch.max(output_fa.data,1)[1]
correct_fa += predict_fa.eq(targets.view_as(predict_fa)).sum().item()
print('Test set: BP Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(test_loss_bp, correct_bp, len(test_loader.dataset),
100. * correct_bp / len(test_loader.dataset)))
print('Test set: FA Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(test_loss_fa, correct_fa, len(test_loader.dataset),
100. * correct_fa / len(test_loader.dataset)))
Error
I'm curious about the meaning of 'RuntimeError: tensors must be 2-D'. We would appreciate it if you could tell us why it happened and where you made the mistake.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-9-9b8b6f683e59> in <module>
16 #targets = targets.to(device)
17
---> 18 output_bp = model_bp(inputs)
19 output_fa = model_fa(inputs)
20 # sum up batch loss
~\anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
c:\Users\bclab\Desktop\feedback-alignment-pytorch-master\lib\linear.py in forward(self, inputs)
102 """
103 # first layer
--> 104 linear1 = F.relu(self.linear[0](inputs))
105
106 linear2 = self.linear[1](linear1)
~\anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
c:\Users\bclab\Desktop\feedback-alignment-pytorch-master\lib\linear.py in forward(self, input)
69 def forward(self, input):
70 # See the autograd section for explanation of what happens here.
---> 71 return LinearFunction.apply(input, self.weight, self.bias)
72
73
c:\Users\bclab\Desktop\feedback-alignment-pytorch-master\lib\linear.py in forward(ctx, input, weight, bias)
11 def forward(ctx, input, weight, bias=None):
12 ctx.save_for_backward(input, weight, bias)
---> 13 output = input.mm(weight.t())
14 if bias is not None:
15 output += bias.unsqueeze(0).expand_as(output)
RuntimeError: tensors must be 2-D
This is my model. And fa_linear, linear : customize network
# load feedforward dfa model
model_fa = fa_linear.LinearFANetwork(in_features=784, num_layers=2, num_hidden_list=[1000, 10]).to(device)
# load reference linear model
model_bp = linear.LinearNetwork(in_features=784, num_layers=2, num_hidden_list=[1000, 10]).to(device)
# optimizers
optimizer_fa = torch.optim.SGD(model_fa.parameters(),
lr=1e-4, momentum=0.9, weight_decay=0.001, nesterov=True)
optimizer_bp = torch.optim.SGD(model_bp.parameters(),
lr=1e-4, momentum=0.9, weight_decay=0.001, nesterov=True)
loss_crossentropy = torch.nn.CrossEntropyLoss()
# make log file
results_path = 'bp_vs_fa_'
logger_train = open(results_path + 'train_log2.txt', 'w')
linear
from torch.autograd import Function
from torch import nn
import torch
import torch.nn.functional as F
# Inherit from Function
class LinearFunction(Function):
# Note that both forward and backward are #staticmethods
#staticmethod
# bias is an optional argument
def forward(ctx, input, weight, bias=None):
ctx.save_for_backward(input, weight, bias)
output = input.mm(weight.t())
if bias is not None:
output += bias.unsqueeze(0).expand_as(output)
return output
# This function has only a single output, so it gets only one gradient
#staticmethod
def backward(ctx, grad_output):
# This is a pattern that is very convenient - at the top of backward
# unpack saved_tensors and initialize all gradients w.r.t. inputs to
# None. Thanks to the fact that additional trailing Nones are
# ignored, the return statement is simple even when the function has
# optional inputs.
input, weight, bias = ctx.saved_variables
grad_input = grad_weight = grad_bias = None
# These needs_input_grad checks are optional and there only to
# improve efficiency. If you want to make your code simpler, you can
# skip them. Returning gradients for inputs that don't require it is
# not an error.
if ctx.needs_input_grad[0]:
grad_input = grad_output.mm(weight)
if ctx.needs_input_grad[1]:
grad_weight = grad_output.t().mm(input)
if bias is not None and ctx.needs_input_grad[2]:
grad_bias = grad_output.sum(0).squeeze(0)
return grad_input, grad_weight, grad_bias
class Linear(nn.Module):
def __init__(self, input_features, output_features, bias=True):
super(Linear, self).__init__()
self.input_features = input_features
self.output_features = output_features
# nn.Parameter is a special kind of Variable, that will get
# automatically registered as Module's parameter once it's assigned
# as an attribute. Parameters and buffers need to be registered, or
# they won't appear in .parameters() (doesn't apply to buffers), and
# won't be converted when e.g. .cuda() is called. You can use
# .register_buffer() to register buffers.
# nn.Parameters can never be volatile and, different than Variables,
# they require gradients by default.
self.weight = nn.Parameter(torch.Tensor(output_features, input_features))
if bias:
self.bias = nn.Parameter(torch.Tensor(output_features))
else:
# You should always register all possible parameters, but the
# optional ones can be None if you want.
self.register_parameter('bias', None)
# weight initialization
torch.nn.init.kaiming_uniform(self.weight)
torch.nn.init.constant(self.bias, 1)
def forward(self, input):
# See the autograd section for explanation of what happens here.
return LinearFunction.apply(input, self.weight, self.bias)
class LinearNetwork(nn.Module):
def __init__(self, in_features, num_layers, num_hidden_list):
"""
:param in_features: dimension of input features (784 for MNIST)
:param num_layers: number of layers for feed-forward net
:param num_hidden_list: list of integers indicating hidden nodes of each layer
"""
super(LinearNetwork, self).__init__()
self.in_features = in_features
self.num_layers = num_layers
self.num_hidden_list = num_hidden_list
# create list of linear layers
# first hidden layer
self.linear = [Linear(self.in_features, self.num_hidden_list[0])]
# append additional hidden layers to list
for idx in range(self.num_layers - 1):
self.linear.append(Linear(self.num_hidden_list[idx], self.num_hidden_list[idx+1]))
# create ModuleList to make list of layers work
self.linear = nn.ModuleList(self.linear)
def forward(self, inputs):
"""
forward pass, which is same for conventional feed-forward net
:param inputs: inputs with shape [batch_size, in_features]
:return: logit outputs from the network
"""
# first layer
linear1 = F.relu(self.linear[0](inputs))
linear2 = self.linear[1](linear1)
return linear2
fa_linear
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch import autograd
from torch.autograd import Variable
class LinearFANetwork(nn.Module):
"""
Linear feed-forward networks with feedback alignment learning
Does NOT perform non-linear activation after each layer
"""
def __init__(self, in_features, num_layers, num_hidden_list):
"""
:param in_features: dimension of input features (784 for MNIST)
:param num_layers: number of layers for feed-forward net
:param num_hidden_list: list of integers indicating hidden nodes of each layer
"""
super(LinearFANetwork, self).__init__()
self.in_features = in_features
self.num_layers = num_layers
self.num_hidden_list = num_hidden_list
# create list of linear layers
# first hidden layer
self.linear = [LinearFAModule(self.in_features, self.num_hidden_list[0])]
# append additional hidden layers to list
for idx in range(self.num_layers - 1):
self.linear.append(LinearFAModule(self.num_hidden_list[idx], self.num_hidden_list[idx+1]))
# create ModuleList to make list of layers work
self.linear = nn.ModuleList(self.linear)
def forward(self, inputs):
"""
forward pass, which is same for conventional feed-forward net
:param inputs: inputs with shape [batch_size, in_features]
:return: logit outputs from the network
"""
# first layer
linear1 = self.linear[0](inputs)
# second layer
linear2 = self.linear[1](linear1)
return linear2
class LinearFAFunction(autograd.Function):
#staticmethod
# same as reference linear function, but with additional fa tensor for backward
def forward(context, input, weight, weight_fa, bias=None):
context.save_for_backward(input, weight, weight_fa, bias)
output = input.mm(weight.t())
if bias is not None:
output += bias.unsqueeze(0).expand_as(output)
return output
#staticmethod
def backward(context, grad_output):
input, weight, weight_fa, bias = context.saved_variables
grad_input = grad_weight = grad_weight_fa = grad_bias = None
if context.needs_input_grad[0]:
# all of the logic of FA resides in this one line
# calculate the gradient of input with fixed fa tensor, rather than the "correct" model weight
grad_input = grad_output.mm(weight_fa)
if context.needs_input_grad[1]:
# grad for weight with FA'ed grad_output from downstream layer
# it is same with original linear function
grad_weight = grad_output.t().mm(input)
if bias is not None and context.needs_input_grad[3]:
grad_bias = grad_output.sum(0).squeeze(0)
return grad_input, grad_weight, grad_weight_fa, grad_bias
class LinearFAModule(nn.Module):
def __init__(self, input_features, output_features, bias=True):
super(LinearFAModule, self).__init__()
self.input_features = input_features
self.output_features = output_features
# weight and bias for forward pass
# weight has transposed form; more efficient (so i heard) (transposed at forward pass)
self.weight = nn.Parameter(torch.Tensor(output_features, input_features))
if bias:
self.bias = nn.Parameter(torch.Tensor(output_features))
else:
self.register_parameter('bias', None)
# fixed random weight and bias for FA backward pass
# does not need gradient
self.weight_fa = nn.Parameter(Variable(torch.FloatTensor(output_features, input_features), requires_grad=False))
# weight initialization
torch.nn.init.kaiming_uniform(self.weight)
torch.nn.init.kaiming_uniform(self.weight_fa)
torch.nn.init.constant(self.bias, 1)
def forward(self, input):
return LinearFAFunction.apply(input, self.weight, self.weight_fa, self.bias)
You just need to flatten your input before passing it to your model. Something like this:
# ...
# from [batch_size, 1, 28, 28] <- 4-D
# to [batch_size, 1x28x28] <- 2-D, as expected
flat_inputs = torch.flatten(inputs)
output_bp = model_bp(flat_inputs)
output_fa = model_fa(flat_inputs)
# ...
I am trying to run the following code (as given in Tensorflow documentation) to create windows of my data and then flatten the dataset of datasets.
window_size = 5
windows = range_ds.window(window_size, shift=1)
for sub_ds in windows.take(5):
print(sub_ds)
flat_windows = windows.flat_map(lambda x: x)
The problem is that flat_windows.cardinality().numpy() returns cardinality to be -2 which is creating problem for me during training. I tried looking for ways to set_cardinality of a dataset but couldn't find anything. I also tried other ways of flattening a dataset of datasets, but again no success.
Edit-1: The problem with the training is that the shape is unknown (at Linear and Dense layers) when I am training a subclass model (given below). The model trains well when I train the model eagerly (through tf.config.run_functions_eagerly(True)) but that is slow. Therefore I want the input data to be known for the model training.
Neural Network
class NeuralNetworkModel(tf.keras.Model):
def __init__(self):
super(NeuralNetworkModel, self).__init__()
self.encoder = Encoder()
def train_step(self, inputs):
X = inputs[0]
Y = inputs[1]
with tf.GradientTape() as tape:
enc_X = self.encoder(X)
enc_Y = self.encoder(Y)
# loss:
loss = tf.norm(enc_Y - enc_X, axis = [0, 1], ord = 'fro')
# Compute gradients
trainable_vars = self.encoder.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
#property
def metrics(self):
# We list our `Metric` objects here so that `reset_states()` can be
# called automatically at the start of each epoch
# or at the start of `evaluate()`.
# If you don't implement this property, you have to call
# `reset_states()` yourself at the time of your choosing.
return [loss_tracker]
def test_step(self, inputs):
X = inputs[0]
Y = inputs[1]
Psi_X = self.encoder(X)
Psi_Y = self.encoder(Y)
# loss:
loss = tf.norm(Psi_Y - Psi_X, axis = [0, 1], ord = 'fro')
# Compute our own metrics
loss_tracker.update_state(loss)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {"loss": loss_tracker.result()}
class Encoder(tf.keras.Model):
def __init__(self):
super(Encoder, self).__init__(dtype = 'float64', name = 'Encoder')
self.input_layer = DenseLayer(128)
self.hidden_layer1 = DenseLayer(128)
self.hidden_layer2 = DenseLayer(64)
self.hidden_layer3 = DenseLayer(64)
self.output_layer = LinearLayer(64)
def call(self, input_data, training):
fx = self.input_layer(input_data)
fx = self.hidden_layer1(fx)
fx = self.hidden_layer2(fx)
fx = self.hidden_layer3(fx)
return self.output_layer(fx)
class LinearLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(LinearLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
class DenseLayer(tf.keras.layers.Layer):
def __init__(self, units):
super(DenseLayer, self).__init__(dtype = 'float64')
self.units = units
def build(self, input_shape):
input_dim = input_shape[-1]
self.w = self.add_weight(shape = (input_dim, self.units),
initializer = "random_normal",
trainable = True)
self.b = self.add_weight(shape = (self.units,),
initializer = tf.zeros_initializer(),
trainable = True)
def call(self, inputs):
x = tf.matmul(inputs, self.w) + self.b
return tf.nn.elu(x)
I was wondering about this as well. Turns out that -2 is tf.data.UNKNOWN_CARDINALITY (https://www.tensorflow.org/api_docs/python/tf/data#UNKNOWN_CARDINALITY), which represents that TF doesn't know how many elements the flat_map returns per item.
I just asked Windowing a TensorFlow dataset without losing cardinality information? to see if anyone knows a way to window datasets without losing cardinality.
The episodic memory module from DMN has a unique state update mechanism that is based on the attention:
Equation 8 describes the gate g, the attention map, is used to compute the next state between a GRU and the previous state. My current implementation uses:
class EpisodicMemory(L.Wrapper):
"""Episodic memory from DMN."""
def __init__(self, units, **kwargs):
self.grucell = L.GRUCell(units, name=kwargs['name']+'_gru') # Internal cell
super().__init__(self.grucell, **kwargs)
def build(self, input_shape):
"""Build the layer."""
_, _, ctx_shape = input_shape
self.grucell.build((ctx_shape[0],) + ctx_shape[2:])
super().build(input_shape)
def call(self, inputs):
"""Compute new state episode."""
init_state, atts, cs = inputs
# GRU pass over the facts, according to the attention mask.
while_valid_index = lambda state, index: index < tf.shape(cs)[1]
retain = 1 - atts
update_state = (lambda state, index: (atts[:,index,:] * self.grucell.call(cs[:,index,:], [state])[0] + retain[:,index,:] * state))
# Loop over context
final_state, _ = tf.while_loop(while_valid_index,
(lambda state, index: (update_state(state, index), index+1)),
loop_vars = [init_state, 0])
return final_state
def compute_output_shape(self, input_shape):
"""Collapse time dimension."""
return input_shape[0]
and used as:
# ...
sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_sents, repeated_q])
sim_vec = att_dense1(sim_vec) # (?, context_size, dim)
sim_vec = att_dense2(sim_vec) # (?, context_size, 1)
episodic_mem = EpisodicMemory(dim, name='episodic_mem')
state = episodic_mem([state, sim_vec, embedded_sents])
# ...
Is there a way to implement this in Keras without binding to Tensorflow functions? Even though we can pass constants to a custom RNN cell, say the attention map, we don't know the current index of the loop inside call function.
Following this paper on domain adaptation, I am trying to implement the following layer for gradient reversal (written for Keras with the Theano backend, as found in this Keras issue) in Tensorflow, as my model does not run well with Theano.
class GradientReversalLayer(Layer):
""" Reverse a gradient
<feedforward> return input x
<backward> return -lambda * delta
"""
def __init__(self, hp_lambda, **kwargs):
super(GradientReversalLayer, self).__init__(**kwargs)
self.hp_lambda = hp_lambda
self.gr_op = ReverseGradient(self.hp_lambda)
def build(self, input_shape):
self.trainable_weights = []
def call(self, x, mask=None):
return self.gr_op(x)
def get_output_shape_for(self, input_shape):
return input_shape
def get_config(self):
config = {"name": self.__class__.__name__,
"lambda": self.hp_lambda}
base_config = super(GradientReversalLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
The layer performs this operation:
import theano
from keras.engine import Layer
class ReverseGradient(theano.Op):
""" theano operation to reverse the gradients
Introduced in http://arxiv.org/pdf/1409.7495.pdf
"""
view_map = {0: [0]}
__props__ = ('hp_lambda', )
def __init__(self, hp_lambda):
super(ReverseGradient, self).__init__()
self.hp_lambda = hp_lambda
def make_node(self, x):
assert hasattr(self, '_props'), "Your version of theano is too old to support __props__."
x = theano.tensor.as_tensor_variable(x)
return theano.Apply(self, [x], [x.type()])
def perform(self, node, inputs, output_storage):
xin, = inputs
xout, = output_storage
xout[0] = xin
def grad(self, input, output_gradients):
return [-self.hp_lambda * output_gradients[0]]
def infer_shape(self, node, i0_shapes):
return i0_shapes
Why can I not use it like this?
If I run my model with the tf backend and with this function written in Theano I get the following error:
theano.tensor.var.AsTensorError: ('Cannot convert Tensor("concatenate_1/concat:0", shape=(?, ?, 128), dtype=float32) to TensorType', <class 'tensorflow.python.framework.ops.Tensor'>)
After calling it like this:
lstm_concat = concatenate([hidden_out_1, hidden_out_2])
lstm_concat = FlipGradientKeras.GradientReversalLayer(0.31)(lstm_concat)
How do I convert this operation to a TF operation?
The documentation about adding a new operation only suggests to implement it in C++.
The ops codes show the general framework, but I'd like to be sure that everything that I'm implementing everything that the Theano op does.
I would assume it would be something on the lines of:
def ReverseGradient(input_tensor, hp_lambda):
with ops.name_scope(name, "ReverseGradient", [input_tensor, hp_lambda]) as name:
input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor")
But I'm really not sure about the rest.
Thanks in advance!
I solved the problem by expanding on the work done here.
Here's the working code:
import tensorflow as tf
from keras.engine import Layer
import keras.backend as K
def reverse_gradient(X, hp_lambda):
'''Flips the sign of the incoming gradient during training.'''
try:
reverse_gradient.num_calls += 1
except AttributeError:
reverse_gradient.num_calls = 1
grad_name = "GradientReversal%d" % reverse_gradient.num_calls
#tf.RegisterGradient(grad_name)
def _flip_gradients(op, grad):
return [tf.negative(grad) * hp_lambda]
g = K.get_session().graph
with g.gradient_override_map({'Identity': grad_name}):
y = tf.identity(X)
return y
class GradientReversal(Layer):
'''Flip the sign of gradient during training.'''
def __init__(self, hp_lambda, **kwargs):
super(GradientReversal, self).__init__(**kwargs)
self.supports_masking = False
self.hp_lambda = hp_lambda
def build(self, input_shape):
self.trainable_weights = []
def call(self, x, mask=None):
return reverse_gradient(x, self.hp_lambda)
def get_output_shape_for(self, input_shape):
return input_shape
def get_config(self):
config = {}
base_config = super(GradientReversal, self).get_config()
return dict(list(base_config.items()) + list(config.items()))