I want to define a new CustomLayer in tensorflow in order to writedown a customized forward and backward operation.
Actually I've defined a CustomLayer according to Dense tensorflow layer. But It doesn't work and I'm stuck with it.
Can anyone help me with this?
Here you are the current version of my code:
class CustomLayer(Layer):
def __init__(self,
units,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
dynamic=True,
**kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(CustomLayer, self).__init__(activity_regularizer=regularizers.get(activity_regularizer), **kwargs)
self.units = int(units)
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.supports_masking = True
self.input_spec = InputSpec(min_ndim=2)
def build(self, input_shape):
dtype = dtypes.as_dtype(self.dtype or K.floatx())
if not (dtype.is_floating or dtype.is_complex):
raise TypeError('Unable to build Dense layer with non-floating point '
'dtype %s' % (dtype,))
input_shape = tensor_shape.TensorShape(input_shape)
if tensor_shape.dimension_value(input_shape[-1]) is None:
raise ValueError('The last dimension of the inputs to Dense '
'should be defined. Found None.')
last_dim = tensor_shape.dimension_value(input_shape[-1])
self.input_spec = InputSpec(min_ndim=2,
axes={-1: last_dim})
self.kernel = self.add_weight(
'kernel',
shape=[last_dim, self.units],
initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint,
dtype=self.dtype,
trainable=True)
if self.use_bias:
self.bias = self.add_weight(
'bias',
shape=[self.units, ],
initializer=self.bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint,
dtype=self.dtype,
trainable=True)
else:
self.bias = None
self.built = True
def call(self, inputs):
global global_self
global_self = self
return custom_op(inputs)
global_self = None
#tf.custom_gradient
def custom_op(inputs):
self = global_self
rank = len(inputs.shape)
if rank > 2:
# Broadcasting is required for the inputs.
outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]])
# Reshape the output back to the original ndim of the input.
if not context.executing_eagerly():
shape = inputs.shape.as_list()
output_shape = shape[:-1] + [self.units]
outputs.set_shape(output_shape)
else:
inputs = math_ops.cast(inputs, self._compute_dtype)
if K.is_sparse(inputs):
outputs = sparse_ops.sparse_tensor_dense_matmul(inputs, self.kernel)
else:
outputs = gen_math_ops.mat_mul(inputs, self.kernel)
if self.use_bias:
outputs = nn.bias_add(outputs, self.bias)
if self.activation is not None:
return self.activation(outputs) # pylint: disable=not-callable
def custom_grad(dy):
print(dy, [dy])
grad = dy # compute gradient
return grad
return outputs, custom_grad
When I try executing get this traceback
Traceback (most recent call last):
File "/home/labt41/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 802, in call
outputs = call_fn(cast_inputs, *args, **kwargs)
File "/home/labt41/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py", line 237, in wrapper
raise e.ag_error_metadata.to_exception(e)
tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: in converted code:
relative to /home/labt41:
PycharmProjects/TF_Custom/CustomLayer_stackoverflow.py:125 call *
return custom_op(inputs)
PycharmProjects/TF_Custom/CustomLayer_stackoverflow.py:136 decorated *
return _graph_mode_decorator(f, *args, **kwargs)
anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/custom_gradient.py:229 _graph_mode_decorator
result, grad_fn = f(*args)
anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:547 iter
self._disallow_iteration()
anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:543 _disallow_iteration
self._disallow_in_graph_mode("iterating over `tf.Tensor`")
anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:523 _disallow_in_graph_mode
" this function with #tf.function.".format(task))
OperatorNotAllowedInGraphError: iterating over tf.Tensor is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/labt41/PycharmProjects/TF_Custom/CustomLayer_stackoverflow.py", line 179, in <module>
model = create_model()
File "/home/labt41/PycharmProjects/TF_Custom/CustomLayer_stackoverflow.py", line 50, in create_model
layer = CustomLayer(units=128)(visible)
File "/home/labt41/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 814, in call
str(e) + '\n"""')
TypeError: You are attempting to use Python control flow in a layer that was not declared to be dynamic. Pass dynamic=True to the class constructor.
Encountered error:
in converted code:
relative to /home/labt41:
PycharmProjects/TF_Custom/CustomLayer_stackoverflow.py:125 call *
return custom_op(inputs)
PycharmProjects/TF_Custom/CustomLayer_stackoverflow.py:136 decorated *
return _graph_mode_decorator(f, *args, **kwargs)
anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/custom_gradient.py:229 _graph_mode_decorator
result, grad_fn = f(*args)
anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:547 iter
self._disallow_iteration()
anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:543 _disallow_iteration
self._disallow_in_graph_mode("iterating over `tf.Tensor`")
anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:523 _disallow_in_graph_mode
" this function with #tf.function.".format(task))
OperatorNotAllowedInGraphError: iterating over tf.Tensor is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
Related
I implement a model with a GRU layer, the model and its training work fine with just
class MyModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units,
return_sequences=True,
return_state=True)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
I just change the definition of the GRU layer to make it (1) CuDNN compatible (2) add dropout
In the model definition I kept
self.gru = tf.keras.layers.GRU(rnn_units,
return_sequences=True,
return_state=True)
In the call function I set
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training,
reset_after=True, recurrent_activation='sigmoid', # to make it more GPU friendly
recurrent_dropout=0.2, dropout=0.2 # to add some dropout to it
)
The Keras or the Tensorflow guidelines seem to be observed, while I get this error
Traceback (most recent call last):
File "rnn_train_004.py", line 125, in <module>
example_batch_predictions = model(input_example_batch)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 1037, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "rnn_train_004.py", line 107, in call
recurrent_dropout=0.2, dropout=0.2 # to add some dropout to it
File "/usr/local/lib/python3.6/dist-packages/keras/layers/recurrent.py", line 716, in __call__
return super(RNN, self).__call__(inputs, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 1037, in __call__
outputs = call_fn(inputs, *args, **kwargs)
TypeError: call() got an unexpected keyword argument 'reset_after'
Passing arguments to the constructor and not to the call() method
class MyModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units,
return_sequences=True,
return_state=True,
reset_after=True,
recurrent_activation='sigmoid', # to make it more GPU friendly
recurrent_dropout=0.2,
dropout=0.2 # to add some dropout to it
)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
I have a class called MaskedDense. Below is its structure.
class MaskedDense(Layer):
def __init__(self, units, activation=None, use_bias=True, **kwargs):
self.units = units
self.activation = keras.activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = keras.initializers.glorot_uniform()
self.bias_initializer = keras.initializers.Zeros()
self.mask_initializer = keras.initializers.Ones()
super(MaskedDense, self).__init__(**kwargs)
def get_config(self):
config = super(MaskedDense, self).get_config().copy()
config.update({
"units": self.units,
"activation": self.activation,
"use_bias": self.use_bias,
"kernel_initializer": self.kernel_initializer,
"bias_initializer": self.bias_initializer,
"mask_initializer": self.mask_initializer
})
return config
def build(self, input_shape):
# Create a trainable weight variable for this layer.
input_dim = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel')
# The mask is not trainable
self.mask = self.add_weight(shape=(input_dim, self.units),
initializer=self.mask_initializer,
trainable=False,
name='mask')
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias')
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
super(MaskedDense, self).build(input_shape) # Be sure to call this at the end
def call(self, inputs):
output = K.dot(inputs, self.kernel * self.mask)
if self.use_bias:
output = K.bias_add(output, self.bias, data_format='channels_last')
if self.activation is not None:
output = self.activation(output)
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
def set_mask(self, value, feature_idx, class_idx = None):
"""
Set the mask of [feature_idx, class_idx] to a value.
feature_idx: index of the feature
class_idx: index of the class (or a list of indices). None means setting the value to all the classes
value: the value to set
"""
weights = K.get_value(self.mask)
assert feature_idx >= 0 and feature_idx < weights.shape[0], f"Feature index out of bound [0, ..., {weights.shape[0]-1}] -- {feature_idx} given"
if class_idx is not None:
if isinstance(class_idx, list):
for idx in class_idx:
assert idx >= 0 and idx < weights.shape[1], f"Class index out of bound [0, ..., {weights.shape[1]-1}] -- {idx} given"
weights[feature_idx,idx] = value
elif isinstance(class_idx, int):
idx = class_idx
assert idx >= 0 and idx < weights.shape[1], f"Class index out of bound [0, ..., {weights.shape[1]-1}] -- {idx} given"
weights[feature_idx,idx] = value
else:
weights[feature_idx,:] = value
K.set_value(self.mask, weights)
def disable_mask(self, feature_idx, class_idx = None):
self.set_mask(value = 0, feature_idx = feature_idx, class_idx = class_idx)
def enable_mask(self, feature_idx, class_idx = None):
self.set_mask(value = 1, feature_idx = feature_idx, class_idx = class_idx)
def get_masked_weights(self):
return K.get_value(self.mask) * K.get_value(self.kernel)
Then I am trying to create the model. Below is the code.
text_input = Input(shape=(None,), dtype="int32")
embedded_text = Embedding(vocab_size, 300, weights=[embedding_matrix], input_length=150, trainable=True)(text_input)
filters = [(10, 2), (10, 3), (10, 4)]
filter_layers = [Conv1D(f[0], f[1], activation='relu', trainable=True)(embedded_text) for f in filters]
max_pool_layers = [GlobalMaxPool1D()(result) for result in filter_layers]
concatenated = concatenate(max_pool_layers,axis=-1)
ans = MaskedDense(len(class_names), activation='softmax')(concatenated)
model = Model(text_input, ans)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
I am getting the following error:
TypeError: unsupported operand type(s) for /: 'Dimension' and 'float'
What am I doing wrong?
Here is the full error:
Traceback (most recent call last): File "find_1.py", line 227, in
<module>
ans = MaskedDense(len(class_names), activation='softmax')(concatenated) File
"/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py",
line 746, in __call__
self.build(input_shapes) File "find_1.py", line 151, in build
name='kernel') File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py",
line 609, in add_weight
aggregation=aggregation) File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/checkpointable/base.py",
line 639, in _add_variable_with_custom_getter
**kwargs_for_getter) File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py",
line 1977, in make_variable
aggregation=aggregation) File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/variables.py",
line 183, in __call__
return cls._variable_v1_call(*args, **kwargs) File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/variables.py",
line 146, in _variable_v1_call
aggregation=aggregation) File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/variables.py",
line 125, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs) File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py",
line 2437, in default_variable_creator
import_scope=import_scope) File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/variables.py",
line 187, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs) File
"/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py",
line 297, in __init__
constraint=constraint) File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py",
line 409, in _init_from_args
initial_value() if init_from_fn else initial_value, File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py",
line 1959, in <lambda>
shape, dtype=dtype, partition_info=partition_info) File "/home/user_name/miniforge3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py",
line 473, in __call__
scale /= max(1., (fan_in + fan_out) / 2.) TypeError: unsupported operand type(s) for /: 'Dimension' and 'float'
Changing input_dim = input_shape[-1] to input_dim = int(input_shape[-1]) solved the problem.
I am loading a pretrained attention model in Keras using load_model() .
My Attention class is defined as below.
# attention class
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints
from keras import backend as K
class Attention(Layer):
def __init__(self, step_dim, w_regularizer=None, b_regularizer=None,
w_constraint=None, b_constraint=None, bias=True, **kwargs):
self.supports_masking = True
# weight initializer
self.init = initializers.get('glorot_uniform')
self.w_regularizer = regularizers.get(w_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.w_constraint = constraints.get(w_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
self.step_dim = step_dim
self.features_dim = 0
super(Attention, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.w = self.add_weight(shape=(input_shape[-1],),
initializer=self.init, name='{}_w'.format(self.name),
regularizer=self.w_regularizer,
constraint=self.w_constraint)
self.features_dim = input_shape[-1]
if self.bias:
self.b = self.add_weight(shape=(input_shape[1],),
initializer='zero', name='{}_b'.format(self.name),
regularizer=self.b_regularizer,
constraint=self.b_constraint)
else:
self.b = None
self.built = True
def compute_mask(self, input, input_mask=None):
return None
def call(self, x, mask=None):
features_dim = self.features_dim
step_dim = self.step_dim
eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
K.reshape(self.w, (features_dim, 1))), (-1, step_dim))
if self.bias:
eij += self.b
eij = K.tanh(eij)
a = K.exp(eij)
if mask is not None:
a *= K.cast(mask, K.floatx())
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
a = K.expand_dims(a)
weighted_input = x * a
return K.sum(weighted_input, axis=1)
def compute_output_shape(self, input_shape):
return input_shape[0], self.features_dim
def get_config(self):
config = {
'step_dim': self.step_dim,
'w_regularizer': self.w_regularizer,
'w_constraint': self.w_constraint,
'b_regularizer': self.b_regularizer,
'b_constraint': self.b_constraint,
'bias': self.bias
}
base_config = super(Attention, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
The model is called in test_loadmod.py as
from attention import Attention
from keras.models import load_model
model = load_model('attention_wo_cval.h5', custom_objects={'Attention': Attention})
print(model)
The custom Attention model is made available using load_model() and custom_objects is passed into it as described here.
However it doesnt seem to find the step_dim attribute. Throws up the below error. Any idea how to get this going ? Thanks for your time and help.
Error while loading
TypeError: __init__() missing 1 required positional argument: 'step_dim'
File "test_loadmod.py", line 4, in <module>
model = load_model('attention_wo_cval.h5', custom_objects={'Attention': Attention})
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\saving.py", line 492, in load_wrapper
return load_function(*args, **kwargs)
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\saving.py", line 584, in load_model
model = _deserialize_model(h5dict, custom_objects, compile)
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\saving.py", line 274, in _deserialize_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\saving.py", line 627, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\layers\__init__.py", line 165, in deserialize
return deserialize_keras_object(config,
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\utils\generic_utils.py", line 144, in deserialize_keras_object
return cls.from_config(
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\network.py", line 1056, in from_config
process_layer(layer_data)
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\network.py", line 1041, in process_layer
layer = deserialize_layer(layer_data,
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\layers\__init__.py", line 165, in deserialize
return deserialize_keras_object(config,
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\utils\generic_utils.py", line 149, in deserialize_keras_object
return cls.from_config(config['config'])
File "C:\Users\RV\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\engine\base_layer.py", line 1179, in from_config
return cls(**config)
TypeError: __init__() missing 1 required positional argument: 'step_dim'
The get_config method is the right solution, but you have to pay attention to save the model when this method is updated.
So:
First add the get_config method
Save the model (with this method)
Load the method
I am working on a multi-label text classification problem. I am trying to add attention mechanism with bilstm model. The attention mechanism code is taken from here. I am not able to save the model architecture and getting an error mentioned below. My tensorflow version -2.2.0
from keras import backend as K
def dot_product(x, kernel):
if K.backend() == 'tensorflow':
return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
else:
return K.dot(x, kernel)
class AttentionWithContext(tf.keras.layers.Layer):
"""
# Input shape
3D tensor with shape: `(samples, steps, features)`.
# Output shape
2D tensor with shape: `(samples, features)`.
"""
def __init__(self,
W_regularizer=None, u_regularizer=None, b_regularizer=None,
W_constraint=None, u_constraint=None, b_constraint=None,
bias=True, **kwargs):
self.supports_masking = True
self.init = tf.keras.initializers.get('glorot_uniform')
self.W_regularizer = tf.keras.regularizers.get(W_regularizer)
self.u_regularizer = tf.keras.regularizers.get(u_regularizer)
self.b_regularizer = tf.keras.regularizers.get(b_regularizer)
self.W_constraint = tf.keras.constraints.get(W_constraint)
self.u_constraint = tf.keras.constraints.get(u_constraint)
self.b_constraint = tf.keras.constraints.get(b_constraint)
self.bias = bias
super(AttentionWithContext, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1],),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint)
if self.bias:
self.b = self.add_weight(shape=(input_shape[-1],),
initializer='zero',
name='{}_b'.format(self.name),
regularizer=self.b_regularizer,
constraint=self.b_constraint)
self.u = self.add_weight(shape=(input_shape[-1],),
initializer=self.init,
name='{}_u'.format(self.name),
regularizer=self.u_regularizer,
constraint=self.u_constraint)
super(AttentionWithContext, self).build(input_shape)
def compute_mask(self, input, input_mask=None):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):
uit = dot_product(x, self.W)
if self.bias:
uit += self.b
uit = K.tanh(uit)
ait = dot_product(uit, self.u)
a = K.exp(ait)
# apply mask after the exp. will be re-normalized next
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
a *= K.cast(mask, K.floatx())
# in some cases especially in the early stages of training the sum may be almost zero
# and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
# a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
a = K.expand_dims(a)
weighted_input = x * a
return K.sum(weighted_input, axis=1)
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[-1]
def lstm_with_attention(embedding_matrix,
**kwargs):
STAMP = kwargs['STAMP']
max_seq_length = kwargs['max_seq_length']
EMBEDDING_DIM = kwargs['EMBEDDING_DIM']
nb_words = kwargs['nb_words']
inp = tf.keras.Input(shape=(max_seq_length,))
embedded_seq = tf.keras.layers.Embedding(nb_words,
EMBEDDING_DIM,
weights=[embedding_matrix],
trainable=False)(inp)
x_1_bilstm = tf.keras.layers.Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(128, return_sequences=True))(embedded_seq)
x_1_bn = tf.keras.layers.BatchNormalization()(x_1_bilstm)
x_2_bilstm = tf.keras.layers.Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(64, return_sequences=True))(x_1_bn)
attention = AttentionWithContext()(x_2_bilstm)
x = tf.keras.layers.Dense(64, activation="relu")(attention)
x = tf.keras.layers.Dense(1, activation="sigmoid")(x)
model = tf.keras.Model(inputs=inp, outputs=x)
optimizer = tf.keras.optimizers.Adam()
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()
with open(STAMP + ".json", "w") as json_file: json_file.write(model.to_json())
return model, attention
Building lstm with attention
embedding_matrix, nb_words = get_embedding('glove',word_index)
model, attention_layer = lstm_with_attention(embedding_matrix,STAMP=STAMP,max_seq_length=max_seq_length,nb_words=nb_words,EMBEDDING_DIM=EMBEDDING_DIM)
Error
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-54-4be6d63890f7> in <module>()
20 # # BiGRU CuDNN
21 embedding_matrix, nb_words = get_embedding('glove',word_index)
---> 22 model, attention_layer = lstm_with_attention(embedding_matrix,STAMP=STAMP,max_seq_length=max_seq_length,nb_words=nb_words,EMBEDDING_DIM=EMBEDDING_DIM)
23 # gru_model = make_cudnn_gru_f(max_seq_length,embedding_matrix,loss_func=macro_soft_f1,eval_metric=macro_f1)
24 # model = gru_model()
7 frames
<ipython-input-51-1ae8a90521d0> in lstm_with_attention(embedding_matrix, **kwargs)
115 model.summary()
116
--> 117 with open(STAMP + ".json", "w") as json_file: json_file.write(model.to_json())
118 return model, attention
119
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in to_json(self, **kwargs)
1296 A JSON string.
1297 """
-> 1298 model_config = self._updated_config()
1299 return json.dumps(
1300 model_config, default=serialization.get_json_type, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in _updated_config(self)
1274 from tensorflow.python.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top
1275
-> 1276 config = self.get_config()
1277 model_config = {
1278 'class_name': self.__class__.__name__,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in get_config(self)
966 if not self._is_graph_network:
967 raise NotImplementedError
--> 968 return copy.deepcopy(get_network_config(self))
969
970 #classmethod
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in get_network_config(network, serialize_layer_fn)
2117 filtered_inbound_nodes.append(node_data)
2118
-> 2119 layer_config = serialize_layer_fn(layer)
2120 layer_config['name'] = layer.name
2121 layer_config['inbound_nodes'] = filtered_inbound_nodes
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in serialize_keras_object(instance)
273 return serialize_keras_class_and_config(
274 name, {_LAYER_UNDEFINED_CONFIG_KEY: True})
--> 275 raise e
276 serialization_config = {}
277 for key, item in config.items():
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in serialize_keras_object(instance)
268 name = get_registered_name(instance.__class__)
269 try:
--> 270 config = instance.get_config()
271 except NotImplementedError as e:
272 if _SKIP_FAILED_SERIALIZATION:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in get_config(self)
634 raise NotImplementedError('Layer %s has arguments in `__init__` and '
635 'therefore must override `get_config`.' %
--> 636 self.__class__.__name__)
637 return config
638
NotImplementedError: Layer AttentionWithContext has arguments in `__init__` and therefore must override `get_config`.
This is a TensorFlow feature to notify you that it does not know how to reconstruct your layers since it does not know how to process your configs. Here is a quote from tensorflow's document:
get_config()
Returns the config of the layer.
A layer config is a Python dictionary (serializable) containing the configuration of a layer. The same layer can be reinstantiated later (without its trained weights) from this configuration.
The config of a layer does not include connectivity information, nor the layer class name. These are handled by Network (one layer of abstraction above).
To solve the problem, all you need is to create a get_config method corresponding to __init__ in your class to instruct TensorFlow how to reinstantiate your layer.
def get_config(self):
config = super().get_config().copy()
config.update({
'W_regularizer': self.W_regularizer,
'u_regularizer': self.u_regularizer,
'b_regularizer': self.b_regularizer,
'W_constraint': self.W_constraint,
'u_constraint': self.u_constraint,
'b_constraint': self.b_constraint,
'bias': self.bias,
})
return config
Then you should be able to save and load it.
I am trying to create a model similar to the one proposed in this paper: https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8738842
The custom cell code is available at: https://github.com/SungjoonPark/DenoisingRNN/blob/master/dgrud.py
However, I am not able to embed this custom cell into any RNN model and I am assuming it is because the init takes 3 arguments instead of the standard "num_units".
I tried following the example at https://keras.io/layers/recurrent/:
cell = MinimalRNNCell(32)
x = keras.Input((None, 5))
layer = RNN(cell)
y = layer(x)
but I get an error:
TypeError Traceback (most recent call last)
in 2 x = keras.Input((None,
5)) 3 layer = RNN(cell) ----> 4 y = layer(x)
~/.local/lib/python3.5/site-packages/keras/layers/recurrent.py in
call(self, inputs, initial_state, constants, **kwargs) 539 540 if
initial_state is None and constants is None: --> 541 return super(RNN,
self).call(inputs, **kwargs) 542 543 # If any of initial_state or
constants are specified and are Keras
~/.local/lib/python3.5/site-packages/keras/engine/base_layer.py in
call(self, inputs, **kwargs) 487 # Actually call the layer, 488 #
collecting output(s), mask(s), and shape(s). --> 489 output =
self.call(inputs, **kwargs) 490 output_mask =
self.compute_mask(inputs, previous_mask) 491
~/.local/lib/python3.5/site-packages/keras/layers/recurrent.py in
call(self, inputs, mask, training, initial_state, constants) 680
mask=mask, 681 unroll=self.unroll, --> 682 input_length=timesteps) 683
if self.stateful: 684 updates = []
~/.local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py
in rnn(step_function, inputs, initial_states, go_backwards, mask,
constants, unroll, input_length) 3101 constants=constants, 3102
unroll=unroll, -> 3103 input_length=input_length) 3104 reachable =
tf_utils.get_reachable_from_inputs([learning_phase()], 3105
targets=[last_output])
~/.local/lib/python3.5/site-packages/tensorflow/python/keras/backend.py
in rnn(step_function, inputs, initial_states, go_backwards, mask,
constants, unroll, input_length, time_major, zero_output_for_mask)
3730 # the value is discarded. 3731 output_time_zero, _ =
step_function( -> 3732 input_time_zero, tuple(initial_states) +
tuple(constants)) 3733 output_ta = tuple( 3734
tensor_array_ops.TensorArray(
~/.local/lib/python3.5/site-packages/keras/layers/recurrent.py in
step(inputs, states) 671 else: 672 def step(inputs, states): --> 673
return self.cell.call(inputs, states, **kwargs) 674 675 last_output,
outputs, states = K.rnn(step,
TypeError: call() takes 2 positional arguments but 3 were given
Could you please help me figure out whether it is a init issue, a call issue or I need to define a custom layer for this custom cell?
I tried looking for answers all over the internet and I just can't get any clarity on how embedding a custom cell in a RNN model should be done.
Thank you in advance,
Sam
I was able to recreate your issue while I imported keras directly into the program. See below,
%tensorflow_version 1.x
import keras
from keras import backend as K
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import RNN
class MinimalRNNCell(keras.layers.Layer):
def __init__(self, units, **kwargs):
self.units = units
self.state_size = units
super(MinimalRNNCell, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
initializer='uniform',
name='kernel')
self.recurrent_kernel = self.add_weight(
shape=(self.units, self.units),
initializer='uniform',
name='recurrent_kernel')
self.built = True
def call(self, inputs, states):
prev_output = states[0]
h = K.dot(inputs, self.kernel)
output = h + K.dot(prev_output, self.recurrent_kernel)
return output, [output]
# Let's use this cell in a RNN layer:
cell = MinimalRNNCell(32)
x = keras.Input((None, 5))
layer = RNN(cell)
y = layer(x)
Output -
TensorFlow is already loaded. Please restart the runtime to change versions.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-0f3bed686a7d> in <module>()
34 x = keras.Input((None, 5))
35 layer = RNN(cell)
---> 36 y = layer(x)
5 frames
/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
TypeError: __call__() takes 2 positional arguments but 3 were given
The error vanishes while you import keras from tensorflow import keras. The code runs successfully with tensorflow version 1.x and as well as 2.x. Modify your code as below -
%tensorflow_version 2.x
from keras import backend as K
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
from tensorflow.keras.layers import RNN
# First, let's define a RNN Cell, as a layer subclass.
class MinimalRNNCell(keras.layers.Layer):
def __init__(self, units, **kwargs):
self.units = units
self.state_size = units
super(MinimalRNNCell, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
initializer='uniform',
name='kernel')
self.recurrent_kernel = self.add_weight(
shape=(self.units, self.units),
initializer='uniform',
name='recurrent_kernel')
self.built = True
def call(self, inputs, states):
prev_output = states[0]
h = K.dot(inputs, self.kernel)
output = h + K.dot(prev_output, self.recurrent_kernel)
return output, [output]
# Let's use this cell in a RNN layer:
cell = MinimalRNNCell(32)
x = keras.Input((None, 5))
layer = RNN(cell)
y = layer(x)
print("I Ran Successfully")
Output -
I Ran Successfully
Hope this answers your question. Happy Learning.