Related
If the question seems to dumb, it is because I am new to TensorFlow.
I was implementing a toy endocer-decoder problem using TensorFlow 2’s TFA seq2seq implementation.
The API was clearly understandable until I wanted to change my BasicDecoder with BeamSearchDecoder.
My question is regarding start_tokens and end_token arguments’ initialization of BeamSearchDecoder.
Here is a copy of the implementation, any help is appreciated.
tf.keras.backend.clear_session()
tf.random.set_seed(42)
enc_vocab_size = len(train_vocab) + 1
dec_vocab_size = len(target_vocab) + 1
embed_size = 10
import tensorflow_addons as tfa
encoder_inputs = keras.layers.Input(shape=[None], dtype=np.int32)
decoder_inputs = keras.layers.Input(shape=[None], dtype=np.int32)
sequence_lengths = keras.layers.Input(shape=[], dtype=np.int32)
encoder_embeddings = keras.layers.Embedding(enc_vocab_size, embed_size)(encoder_inputs)
encoder = keras.layers.LSTM(512, return_state = True)
encoder_outputs, state_h, state_c = encoder(encoder_embeddings)
encoder_state = [state_h, state_c]
sampler = tfa.seq2seq.sampler.TrainingSampler()
decoder_embeddings = keras.layers.Embedding(dec_vocab_size, embed_size)(decoder_inputs)
decoder_cell = keras.layers.LSTMCell(512)
output_layer = keras.layers.Dense(dec_vocab_size)
beam_width = 10
start_tokens = tf.zeros([32], tf.dtypes.int32)
end_token = tf.constant(1, tf.dtypes.int32)
decoder = tfa.seq2seq.beam_search_decoder.BeamSearchDecoder(cell = decoder_cell, beam_width = beam_width, output_layer = output_layer)
decoder_initial_state = tfa.seq2seq.beam_search_decoder.tile_batch(encoder_state, multiplier = beam_width)
outputs, _, _ = decoder(decoder_embeddings, start_tokens = start_tokens, end_token = end_token, initial_state = decoder_initial_state)
Y_proba = tf.nn.softmax(outputs.rnn_output)
model = keras.models.Model(inputs = [encoder_inputs, decoder_inputs], outputs = [Y_proba])
model.compile(loss="sparse_categorical_crossentropy", optimizer = 'adam', metrics = ['accuracy'])
Error Trance:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-101-6cf083735ed0> in <module>()
34 decoder = tfa.seq2seq.beam_search_decoder.BeamSearchDecoder(cell = decoder_cell, beam_width = beam_width, output_layer = output_layer)
35 decoder_initial_state = tfa.seq2seq.beam_search_decoder.tile_batch(encoder_state, multiplier = beam_width)
---> 36 outputs, _, _ = decoder(decoder_embeddings, start_tokens = start_tokens, end_token = end_token, initial_state = decoder_initial_state)
37 Y_proba = tf.nn.softmax(outputs.rnn_output)
38
1 frames
/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
690 except Exception as e: # pylint:disable=broad-except
691 if hasattr(e, 'ag_error_metadata'):
--> 692 raise e.ag_error_metadata.to_exception(e)
693 else:
694 raise
ValueError: Exception encountered when calling layer "beam_search_decoder" (type BeamSearchDecoder).
in user code:
File "/usr/local/lib/python3.7/dist-packages/tensorflow_addons/seq2seq/beam_search_decoder.py", line 941, in call *
self,
File "/usr/local/lib/python3.7/dist-packages/typeguard/__init__.py", line 262, in wrapper *
retval = func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow_addons/seq2seq/decoder.py", line 430, in body *
(next_outputs, decoder_state, next_inputs, decoder_finished) = decoder.step(
File "/usr/local/lib/python3.7/dist-packages/tensorflow_addons/seq2seq/beam_search_decoder.py", line 705, in step *
cell_outputs, next_cell_state = self._cell(
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler **
raise e.with_traceback(filtered_tb) from None
ValueError: Exception encountered when calling layer "lstm_cell_1" (type LSTMCell).
Dimensions must be equal, but are 80 and 320 for '{{node beam_search_decoder/decoder/while/BeamSearchDecoderStep/lstm_cell_1/mul}} = Mul[T=DT_FLOAT](beam_search_decoder/decoder/while/BeamSearchDecoderStep/lstm_cell_1/Sigmoid_1, beam_search_decoder/decoder/while/BeamSearchDecoderStep/Reshape_2)' with input shapes: [320,80,2048], [320,512].
Call arguments received:
• inputs=tf.Tensor(shape=(320, None, 10), dtype=float32)
• states=ListWrapper(['tf.Tensor(shape=(320, 512), dtype=float32)', 'tf.Tensor(shape=(320, 512), dtype=float32)'])
• training=None
Call arguments received:
• embedding=tf.Tensor(shape=(None, None, 10), dtype=float32)
• start_tokens=tf.Tensor(shape=(32,), dtype=int32)
• end_token=tf.Tensor(shape=(), dtype=int32)
• initial_state=['tf.Tensor(shape=(None, 512), dtype=float32)', 'tf.Tensor(shape=(None, 512), dtype=float32)']
• training=None
• kwargs=<class 'inspect._empty'>
I answered this on this GitHub repository: https://github.com/ageron/handson-ml2/issues/541
Here is a minimalistic implementation; without attention, of what you want. Beamserach is used during inference once we are done with training.
For implementation of the encoder-decoder part see the provided github.
Implementing Beamserach
def beam_search_inferance_model(beam_width):
batch_size = tf.shape(encoder_input)[:1]
max_output_length = Y_train.shape[1]
start_tokens = tf.fill(dims = batch_size, value = sos_id)
decoder_initial_state = tfa.seq2seq.tile_batch(encoder_state_HC, multiplier = beam_width)
beam_search_inference = tfa.seq2seq.BeamSearchDecoder(cell = LSTMCell, beam_width = beam_width, output_layer = output_layer, maximum_iterations = max_output_length)
outputs, _, _ = beam_search_inference(decoder_embd_layer.variables, start_tokens = start_tokens, end_token = 0, initial_state = decoder_initial_state)
final_outputs = tf.transpose(outputs.predicted_ids, perm = (0,2,1))
beam_scores = tf.transpose(outputs.beam_search_decoder_output.scores, perm = (0,2,1))
return keras.Model(inputs = [encoder_input], outputs = [final_outputs, beam_scores])
beam_search_inferance_model = beam_search_inferance_model(3)
Utility function
I copied this function from TFA's API tutorial and adapted it!!!
def beam_translate(sentence):
X = prepare_date_strs_padded(sentence)
result, beam_scores = beam_search_inferance_model.predict(X)
for beam, score in zip(result, beam_scores):
output = ids_to_date_strs(beam)
beam_score = [a.sum() for a in score]
print('Input: %s' % sentence)
print('-----' * 12)
for i in range(len(output)):
print('{} Predicted translation: {} {}'.format(i + 1, output[i], beam_score[i]))
print('\n')
Output
beam_translate(["July 14, 1789", "September 01, 2020"])
Input: ['July 14, 1789', 'September 01, 2020']
------------------------------------------------------------
1 Predicted translation: 2288-01-11 -83.7786865234375
2 Predicted translation: 2288-01-10 -83.90345764160156
3 Predicted translation: 2288-01-21 -84.30797576904297
Input: ['July 14, 1789', 'September 01, 2020']
------------------------------------------------------------
1 Predicted translation: 2221-02-26 -79.02340698242188
2 Predicted translation: 2222-02-26 -79.29275512695312
3 Predicted translation: 2221-02-21 -80.06587982177734
I hope this helps!
Cheers,
Kasra
I am trying to concatenate the outputs of two or more models.
Code:
import efficientnet.tfkeras as efn
def build_model(model_name="efficientnet_B0", dim=128,
normalize_features=True, return_feature_model=False,
inp_layer=None):
def get_efficientnet(effnet_model):
effnet_model = int(effnet_model)
EFNS = [efn.EfficientNetB0, efn.EfficientNetB1,
efn.EfficientNetB2, efn.EfficientNetB3,
efn.EfficientNetB4, efn.EfficientNetB5,
efn.EfficientNetB6, efn.EfficientNetB7,]
return EFNS[effnet_model]
model_dict = {
"efficientnet": get_efficientnet,
"resnet50": tf.keras.applications.ResNet50,
"vgg19": tf.keras.applications.VGG19,
"Xception": tf.keras.applications.Xception,
}
# For Image Input
if model_name.find('efficientnet') != -1:
base = model_dict['efficientnet'](model_name[-1])(input_shape=(dim,dim,3),
weights='imagenet',
include_top=False)(inp_layer)
else:
base = model_dict[model_name](input_shape=(dim,dim,3),
weights='imagenet',
include_top=False)(inp_layer)
pooling_layer = keras.layers.GlobalAveragePooling2D()(base)
normalized_feature = keras.layers.BatchNormalization(name="norm_featvec")(pooling_layer)
output = keras.layers.Dense(1,activation='sigmoid')(pooling_layer)
if normalize_features:
feat_output = normalized_feature
else:
feat_output = pooling_layer
if return_feature_model:
featext_model = keras.Model(inputs=[img_inp], outputs=[feat_output])
model = keras.Model(inputs=[img_inp], outputs=[output])
if return_feature_model:
return model, featext_model
else:
return model
def get_complete_model(featvec_model_paths, dim=384):
input_layer = keras.layers.Input(shape=(dim, dim, 3), name="img_input")
models = list()
for model_path in featvec_model_paths:
model_name = os.path.basename(model_path)[5:-5]
_, featvec_model = build_model(model_name=model_name, dim=dim,
return_feature_model=True,
inp_layer=input_layer)
featvec_model.load_weights(model_path)
featvec_model.trainable = False
models.append(featvec_model)
concat_layer = keras.layers.Concatenate()(models)
dense_1 = keras.layers.Dense(4096, activation="selu")(concat_layer)
dense_2 = keras.layers.Dense(2048, activation="selu")(dense_1)
dense_3 = keras.layers.Dense(1024, activation="selu")(dense_2)
output_layer = keras.layers.Dense(1, activation="sigmoid")(dense_3)
model = keras.Model(inputs=[input_layer], outputs=[output_layer])
model.compile(loss=keras.losses.BinaryCrossentropy(),
optimizer=keras.optimizers.Nadam(),
metrics=['AUC'])
return model
featvec_model_paths in get_complete_model are paths to weight files.
Execution Line:
featvec_paths = ['feat_efficientnet_b3_0.h5',
'feat_efficientnet_b3_1.h5',
'feat_efficientnet_b4_0.h5',
'feat_efficientnet_b4_1.h5']
final_model, featvec_model = get_complete_model(featvec_paths, dim=384)
You can run the code by commenting featvec_model.load_weights(model_path).
Error:
TypeError Traceback (most recent call last)
<ipython-input-104-76ecd46bf20d> in <module>
----> 1 final_model, featvec_model = get_complete_model(featvec_paths, dim=384)
<ipython-input-103-d92456535b41> in get_complete_model(featvec_model_paths, dim)
17 #print(models)
18
---> 19 concat_layer = keras.layers.Concatenate()(models)
20 dense_1 = keras.layers.Dense(4096, activation="selu")(concat_layer)
21 dense_2 = keras.layers.Dense(2048, activation="selu")(dense_1)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
1006 with ops.name_scope_v2(name_scope):
1007 if not self.built:
-> 1008 self._maybe_build(inputs)
1009
1010 with autocast_variable.enable_auto_cast_variables(
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in _maybe_build(self, inputs)
2708 # operations.
2709 with tf_utils.maybe_init_scope(self):
-> 2710 self.build(input_shapes) # pylint:disable=not-callable
2711 # We must set also ensure that the layer is marked as built, and the build
2712 # shape is stored since user defined build functions may not be calling
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py in wrapper(instance, input_shape)
270 if input_shape is not None:
271 input_shape = convert_shapes(input_shape, to_tuples=True)
--> 272 output_shape = fn(instance, input_shape)
273 # Return shapes from `fn` as TensorShapes.
274 if output_shape is not None:
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/layers/merge.py in build(self, input_shape)
490 def build(self, input_shape):
491 # Used purely for shape validation.
--> 492 if not isinstance(input_shape[0], tuple) or len(input_shape) < 2:
493 raise ValueError('A `Concatenate` layer should be called '
494 'on a list of at least 2 inputs')
TypeError: 'NoneType' object is not subscriptable
I am passing a list of models to the concatenation class object. I also tried appending only the last layers of each model:
models.append(featvec_model.layers[-1]) but this also gave the same error.
I just had to do this in get_complete_model():
models = models.append(featvec_model)
I am getting an error in Tensorflow 2. How can I solve it?
Here is my code (assume all the relevant modules/objects of Keras have been imported):
dense1 = 2**7
dense2 = 2**8
dense3 = 2**9
dropout = 0.8
price_loss = 1
cut_loss = 1
activation= LeakyReLU()
#====================================================================
# INPUTS
#====================================================================
#----------------------------------------------------------------
carat = Input(
shape= (1,),
batch_size= batch_size,
name= 'carat'
)
#----------------------------------------------------------------
color = Input(
shape= (1,),
batch_size= batch_size,
name= 'color'
)
#----------------------------------------------------------------
clarity = Input(
shape= (1,),
batch_size= batch_size,
name= 'clarity'
)
#----------------------------------------------------------------
depth = Input(
shape= (1,),
batch_size= batch_size,
name= 'depth'
)
#----------------------------------------------------------------
table = Input(
shape= (1,),
batch_size= batch_size,
name= 'table'
)
#----------------------------------------------------------------
x = Input(
shape= (1,),
batch_size= batch_size,
name= 'x'
)
#----------------------------------------------------------------
y = Input(
shape= (1,),
batch_size= batch_size,
name= 'y'
)
#----------------------------------------------------------------
z = Input(
shape= (1,),
batch_size= batch_size,
name= 'z'
)
#----------------------------------------------------------------
#====================================================================
# CREATE EMBEDDINGS FOR CATEGORICAL FEATURES "COLOR" AND "CLARITY"
#====================================================================
color = Embedding(input_dim = 7, output_dim = 1, name = 'color_emb')(color)
clarity = Embedding(input_dim = 8, output_dim = 1, name = 'clarity_emb')(clarity)
color = Flatten()(color)
clarity = Flatten()(clarity)
#====================================================================
# CONCATENATE FEATURES
#====================================================================
x = Concatenate()([color, clarity, carat, depth, table, x, y, z])
#====================================================================
# DENSE NETWORK
#====================================================================
x = Dense(dense1, activation = activation)(x)
x = BatchNormalization()(x)
x = Dense(dense2, activation = activation)(x)
x = BatchNormalization()(x)
x = Dense(dense3, activation = activation)(x)
x = BatchNormalization()(x)
x = Dropout(dropout)(x)
#====================================================================
# PREDICTIONS
# ====================================================================
cut = Dense(1, activation = 'sigmoid')(x)
price = Dense(1)(x)
#====================================================================
# DEFINE THE MODEL
# ====================================================================
model = Model(inputs = [carat, color, clarity, depth, table, x, y, z] , outputs = [cut , price])
#====================================================================
# COMPILE THE MODEL
# ====================================================================
model.compile(
optimizer = 'Adam',
loss = {
"price": "huber_loss",
"cut": "binary_crossentropy",
},
loss_weights = [price_loss, cut_loss],
metrics = {
"price": ["mean_absolute_percentage_error"],
"cut": [tf.keras.metrics.AUC(), tf.keras.metrics.Precision(thresholds = thresholds)],
}
)
Stack trace:
WARNING:tensorflow:Functional inputs must come from `tf.keras.Input` (thus holding past layer metadata), they cannot be the output of a previous non-Input layer. Here, a tensor specified as input to "functional_1" was not an Input tensor, it was generated by layer flatten_8.
Note that input tensors are instantiated via `tensor = tf.keras.Input(shape)`.
The tensor that caused the issue was: flatten_8/Reshape:0
WARNING:tensorflow:Functional inputs must come from `tf.keras.Input` (thus holding past layer metadata), they cannot be the output of a previous non-Input layer. Here, a tensor specified as input to "functional_1" was not an Input tensor, it was generated by layer flatten_9.
Note that input tensors are instantiated via `tensor = tf.keras.Input(shape)`.
The tensor that caused the issue was: flatten_9/Reshape:0
WARNING:tensorflow:Functional inputs must come from `tf.keras.Input` (thus holding past layer metadata), they cannot be the output of a previous non-Input layer. Here, a tensor specified as input to "functional_1" was not an Input tensor, it was generated by layer dropout_2.
Note that input tensors are instantiated via `tensor = tf.keras.Input(shape)`.
The tensor that caused the issue was: dropout_2/cond/Identity:0
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-64-132a2d8458b9> in <module>
135 # ====================================================================
136
--> 137 model = Model(inputs = [carat, color, clarity, depth, table, x, y, z] , outputs = [cut , price])
138
139 #====================================================================
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\training.py in __new__(cls, *args, **kwargs)
240 # Functional model
241 from tensorflow.python.keras.engine import functional # pylint: disable=g-import-not-at-top
--> 242 return functional.Functional(*args, **kwargs)
243 else:
244 return super(Model, cls).__new__(cls, *args, **kwargs)
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\functional.py in __init__(self, inputs, outputs, name, trainable)
113 # 'arguments during initialization. Got an unexpected argument:')
114 super(Functional, self).__init__(name=name, trainable=trainable)
--> 115 self._init_graph_network(inputs, outputs)
116
117 #trackable.no_automatic_dependency_tracking
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\functional.py in _init_graph_network(self, inputs, outputs)
189 # Keep track of the network's nodes and layers.
190 nodes, nodes_by_depth, layers, _ = _map_graph_network(
--> 191 self.inputs, self.outputs)
192 self._network_nodes = nodes
193 self._nodes_by_depth = nodes_by_depth
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\functional.py in _map_graph_network(inputs, outputs)
929 'The following previous layers '
930 'were accessed without issue: ' +
--> 931 str(layers_with_complete_input))
932 for x in nest.flatten(node.outputs):
933 computable_tensors.add(id(x))
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("clarity_8:0", shape=(20, 1), dtype=float32) at layer "clarity_emb". The following previous layers were accessed without issue: []
pay attention to not override the input variables. you overrode color, clarity and x input inside the network
here a possible solution:
dense1 = 2**7
dense2 = 2**8
dense3 = 2**9
dropout = 0.8
price_loss = 1
cut_loss = 1
activation= LeakyReLU()
batch_size = 32
#====================================================================
# INPUTS
#====================================================================
carat = Input(shape= (1,), batch_size= batch_size, name= 'carat')
Color = Input(shape= (1,), batch_size= batch_size, name= 'color')
Clarity = Input(shape= (1,), batch_size= batch_size, name= 'clarity')
depth = Input(shape= (1,), batch_size= batch_size, name= 'depth')
table = Input(shape= (1,), batch_size= batch_size, name= 'table')
X = Input(shape= (1,), batch_size= batch_size, name= 'x')
y = Input(shape= (1,), batch_size= batch_size, name= 'y')
z = Input(shape= (1,), batch_size= batch_size, name= 'z')
#====================================================================
# CREATE EMBEDDINGS FOR CATEGORICAL FEATURES "COLOR" AND "CLARITY"
#====================================================================
color = Embedding(input_dim = 7, output_dim = 1, name = 'color_emb')(Color)
clarity = Embedding(input_dim = 8, output_dim = 1, name = 'clarity_emb')(Clarity)
color = Flatten()(color)
clarity = Flatten()(clarity)
#====================================================================
# CONCATENATE FEATURES
#====================================================================
x = Concatenate()([color, clarity, carat, depth, table, X, y, z])
#====================================================================
# DENSE NETWORK
#====================================================================
x = Dense(dense1, activation = activation)(x)
x = BatchNormalization()(x)
x = Dense(dense2, activation = activation)(x)
x = BatchNormalization()(x)
x = Dense(dense3, activation = activation)(x)
x = BatchNormalization()(x)
x = Dropout(dropout)(x)
#====================================================================
# PREDICTIONS
# ====================================================================
cut = Dense(1, activation = 'sigmoid')(x)
price = Dense(1)(x)
#====================================================================
# DEFINE THE MODEL
# ====================================================================
model = Model(inputs = [carat, Color, Clarity, depth, table, X, y, z] ,
outputs = [cut , price])
model.compile('adam', 'mse')
model.summary()
here the running notebook: https://colab.research.google.com/drive/1akpvuGKHXt6Frrec692zvCMAfRzZbIaM?usp=sharing
Well, like always when we are stuck with some error, the answer lies somewhere in error message. Please note that while you have been able to build the structure of your model overwriting your variables color, clarity and so on... the variables changed the type. At the beginning you initialize the variables to be tf.keras.Input and then you overwrite it with Embedding and Flatten layers. So when it comes to building your model, you ask to build model with inputs that are no longer tf.keras.Input but something else.
The solution is to not overwrite those variables.
When I am defining the Hierarchical Attentional Network, an error is popping up which says "AttributeError: can't set attribute". Please help.
This is the Attention.py file
import keras
import Attention
from keras.engine.topology import Layer, Input
from keras import backend as K
from keras import initializers
#Hierarchical Attention Layer Implementation
'''
Implemented by Arkadipta De (MIT Licensed)
'''
class Hierarchical_Attention(Layer):
def __init__(self, attention_dim):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = attention_dim
super(Hierarchical_Attention, self).__init__()
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim, )))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weights = [self.W, self.b, self.u]
super(Hierarchical_Attention, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.dot(uit, self.u)
ait = K.squeeze(ait, -1)
ait = K.exp(ait)
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
ait = K.expand_dims(ait)
weighted_input = x * ait
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
This is the main file where I'm building the model.
import re
import os
import numpy as np
import pandas as pd
import keras
from keras.engine.topology import Layer, Input
import Attention
from sklearn.model_selection import train_test_split
from keras.models import Model, Input
from keras.layers import Dropout, Dense, LSTM, GRU, Bidirectional, concatenate, Multiply, Subtract
from keras.utils import to_categorical
from keras import backend as K
from keras import initializers
Max_Title_Length = 0
Max_Content_Length = 0
for i in range(0, len(X)):
Max_Title_Length = max(Max_Title_Length, len(X['title'][i]))
Max_Content_Length = max(Max_Content_Length, len(X['text'][i]))
vector_size = 100
input_title = Input(shape = (Max_Title_Length,vector_size,), name = 'input_title')
input_content = Input(shape = (Max_Content_Length,vector_size,), name = 'input_content')
def Classifier(input_title, input_content):
#x = Bidirectional(GRU(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_title)
x = Bidirectional(GRU(100, return_sequences=True))(input_title)
x_attention = Attention.Hierarchical_Attention(100)(x)
#y = Bidirectional(LSTM(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_content)
y = Bidirectional(GRU(100, return_sequences=True))(input_content)
y_attention = Attention.Hierarchical_Attention(100)(y)
z = concatenate([x_attention,y_attention])
z = Dense(units = 512, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 256, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 128, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 50, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 10, activation = 'relu')(z)
z = Dropout(0.2)(z)
output = Dense(units = 2, activation = 'softmax')(z)
model = Model(inputs = [input_title, input_content], outputs = output)
model.summary()
return model
def compile_and_train(model, num_epochs):
model.compile(optimizer= 'adam', loss= 'categorical_crossentropy', metrics=['acc'])
history = model.fit([train_x_title,train_x_content], train_label, batch_size=32, epochs=num_epochs)
return history
Classifier_Model = Classifier(input_title,input_content)
This code is giving me an error which says:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __setattr__(self, name, value)
2761 try:
-> 2762 super(tracking.AutoTrackable, self).__setattr__(name, value)
2763 except AttributeError:
AttributeError: can't set attribute
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
6 frames
<ipython-input-43-32804502e0b0> in <module>()
32 return history
33
---> 34 Classifier_Model = Classifier(input_title,input_content)
<ipython-input-43-32804502e0b0> in Classifier(input_title, input_content)
7 #x = Bidirectional(GRU(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_title)
8 x = Bidirectional(GRU(200, return_sequences=True))(input_title)
----> 9 x_attention = Attention.Hierarchical_Attention(100)(x)
10 #y = Bidirectional(LSTM(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_content)
11 y = Bidirectional(GRU(100, return_sequences=True))(input_content)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
927
928 # Maintains info about the `Layer.call` stack.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1096 # Build layer if applicable (if the `build` method has been
1097 # overridden).
-> 1098 self._maybe_build(inputs)
1099 cast_inputs = self._maybe_cast_inputs(inputs, input_list)
1100
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in _maybe_build(self, inputs)
2641 # operations.
2642 with tf_utils.maybe_init_scope(self):
-> 2643 self.build(input_shapes) # pylint:disable=not-callable
2644 # We must set also ensure that the layer is marked as built, and the build
2645 # shape is stored since user defined build functions may not be calling
/content/Attention.py in build(self, input_shape)
23 self.b = K.variable(self.init((self.attention_dim, )))
24 self.u = K.variable(self.init((self.attention_dim, 1)))
---> 25 self.trainable_weights = [self.W, self.b, self.u]
26 super(Hierarchical_Attention, self).build(input_shape)
27
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __setattr__(self, name, value)
2765 ('Can\'t set the attribute "{}", likely because it conflicts with '
2766 'an existing read-only #property of the object. Please choose a '
-> 2767 'different name.').format(name))
2768 return
2769
AttributeError: Can't set the attribute "trainable_weights", likely because it conflicts with an existing read-only #property of the object. Please choose a different name.
I'm a noob in Neural Networks. Please help.
I ran into the same problem when I was trying to execute the code on Google Colab.
I found some answers on StackOverflow says it's an ongoing issue with tf on Colab.
link here
It remains unsolved for me, but I believe you can try to set self._trainable_weights instead of self.trainable_weights
I am working on a multi-label text classification problem. I am trying to add attention mechanism with bilstm model. The attention mechanism code is taken from here. I am not able to save the model architecture and getting an error mentioned below. My tensorflow version -2.2.0
from keras import backend as K
def dot_product(x, kernel):
if K.backend() == 'tensorflow':
return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
else:
return K.dot(x, kernel)
class AttentionWithContext(tf.keras.layers.Layer):
"""
# Input shape
3D tensor with shape: `(samples, steps, features)`.
# Output shape
2D tensor with shape: `(samples, features)`.
"""
def __init__(self,
W_regularizer=None, u_regularizer=None, b_regularizer=None,
W_constraint=None, u_constraint=None, b_constraint=None,
bias=True, **kwargs):
self.supports_masking = True
self.init = tf.keras.initializers.get('glorot_uniform')
self.W_regularizer = tf.keras.regularizers.get(W_regularizer)
self.u_regularizer = tf.keras.regularizers.get(u_regularizer)
self.b_regularizer = tf.keras.regularizers.get(b_regularizer)
self.W_constraint = tf.keras.constraints.get(W_constraint)
self.u_constraint = tf.keras.constraints.get(u_constraint)
self.b_constraint = tf.keras.constraints.get(b_constraint)
self.bias = bias
super(AttentionWithContext, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1],),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint)
if self.bias:
self.b = self.add_weight(shape=(input_shape[-1],),
initializer='zero',
name='{}_b'.format(self.name),
regularizer=self.b_regularizer,
constraint=self.b_constraint)
self.u = self.add_weight(shape=(input_shape[-1],),
initializer=self.init,
name='{}_u'.format(self.name),
regularizer=self.u_regularizer,
constraint=self.u_constraint)
super(AttentionWithContext, self).build(input_shape)
def compute_mask(self, input, input_mask=None):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):
uit = dot_product(x, self.W)
if self.bias:
uit += self.b
uit = K.tanh(uit)
ait = dot_product(uit, self.u)
a = K.exp(ait)
# apply mask after the exp. will be re-normalized next
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
a *= K.cast(mask, K.floatx())
# in some cases especially in the early stages of training the sum may be almost zero
# and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
# a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
a = K.expand_dims(a)
weighted_input = x * a
return K.sum(weighted_input, axis=1)
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[-1]
def lstm_with_attention(embedding_matrix,
**kwargs):
STAMP = kwargs['STAMP']
max_seq_length = kwargs['max_seq_length']
EMBEDDING_DIM = kwargs['EMBEDDING_DIM']
nb_words = kwargs['nb_words']
inp = tf.keras.Input(shape=(max_seq_length,))
embedded_seq = tf.keras.layers.Embedding(nb_words,
EMBEDDING_DIM,
weights=[embedding_matrix],
trainable=False)(inp)
x_1_bilstm = tf.keras.layers.Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(128, return_sequences=True))(embedded_seq)
x_1_bn = tf.keras.layers.BatchNormalization()(x_1_bilstm)
x_2_bilstm = tf.keras.layers.Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(64, return_sequences=True))(x_1_bn)
attention = AttentionWithContext()(x_2_bilstm)
x = tf.keras.layers.Dense(64, activation="relu")(attention)
x = tf.keras.layers.Dense(1, activation="sigmoid")(x)
model = tf.keras.Model(inputs=inp, outputs=x)
optimizer = tf.keras.optimizers.Adam()
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()
with open(STAMP + ".json", "w") as json_file: json_file.write(model.to_json())
return model, attention
Building lstm with attention
embedding_matrix, nb_words = get_embedding('glove',word_index)
model, attention_layer = lstm_with_attention(embedding_matrix,STAMP=STAMP,max_seq_length=max_seq_length,nb_words=nb_words,EMBEDDING_DIM=EMBEDDING_DIM)
Error
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-54-4be6d63890f7> in <module>()
20 # # BiGRU CuDNN
21 embedding_matrix, nb_words = get_embedding('glove',word_index)
---> 22 model, attention_layer = lstm_with_attention(embedding_matrix,STAMP=STAMP,max_seq_length=max_seq_length,nb_words=nb_words,EMBEDDING_DIM=EMBEDDING_DIM)
23 # gru_model = make_cudnn_gru_f(max_seq_length,embedding_matrix,loss_func=macro_soft_f1,eval_metric=macro_f1)
24 # model = gru_model()
7 frames
<ipython-input-51-1ae8a90521d0> in lstm_with_attention(embedding_matrix, **kwargs)
115 model.summary()
116
--> 117 with open(STAMP + ".json", "w") as json_file: json_file.write(model.to_json())
118 return model, attention
119
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in to_json(self, **kwargs)
1296 A JSON string.
1297 """
-> 1298 model_config = self._updated_config()
1299 return json.dumps(
1300 model_config, default=serialization.get_json_type, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in _updated_config(self)
1274 from tensorflow.python.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top
1275
-> 1276 config = self.get_config()
1277 model_config = {
1278 'class_name': self.__class__.__name__,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in get_config(self)
966 if not self._is_graph_network:
967 raise NotImplementedError
--> 968 return copy.deepcopy(get_network_config(self))
969
970 #classmethod
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in get_network_config(network, serialize_layer_fn)
2117 filtered_inbound_nodes.append(node_data)
2118
-> 2119 layer_config = serialize_layer_fn(layer)
2120 layer_config['name'] = layer.name
2121 layer_config['inbound_nodes'] = filtered_inbound_nodes
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in serialize_keras_object(instance)
273 return serialize_keras_class_and_config(
274 name, {_LAYER_UNDEFINED_CONFIG_KEY: True})
--> 275 raise e
276 serialization_config = {}
277 for key, item in config.items():
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in serialize_keras_object(instance)
268 name = get_registered_name(instance.__class__)
269 try:
--> 270 config = instance.get_config()
271 except NotImplementedError as e:
272 if _SKIP_FAILED_SERIALIZATION:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in get_config(self)
634 raise NotImplementedError('Layer %s has arguments in `__init__` and '
635 'therefore must override `get_config`.' %
--> 636 self.__class__.__name__)
637 return config
638
NotImplementedError: Layer AttentionWithContext has arguments in `__init__` and therefore must override `get_config`.
This is a TensorFlow feature to notify you that it does not know how to reconstruct your layers since it does not know how to process your configs. Here is a quote from tensorflow's document:
get_config()
Returns the config of the layer.
A layer config is a Python dictionary (serializable) containing the configuration of a layer. The same layer can be reinstantiated later (without its trained weights) from this configuration.
The config of a layer does not include connectivity information, nor the layer class name. These are handled by Network (one layer of abstraction above).
To solve the problem, all you need is to create a get_config method corresponding to __init__ in your class to instruct TensorFlow how to reinstantiate your layer.
def get_config(self):
config = super().get_config().copy()
config.update({
'W_regularizer': self.W_regularizer,
'u_regularizer': self.u_regularizer,
'b_regularizer': self.b_regularizer,
'W_constraint': self.W_constraint,
'u_constraint': self.u_constraint,
'b_constraint': self.b_constraint,
'bias': self.bias,
})
return config
Then you should be able to save and load it.