Why my model using a custom layer does not work properly? - python

I am working on customizing a layer to use in my model.
The core part is the "call" function as,
class Custom_Layer(Layer):
// some code
def call(self, inputs, **kwargs):
kernel = mul(self.base, self.diag_start - self.diag_end)
outputs = matmul(a=inputs, b=kernel)
if self.use_bias:
outputs = tf.nn.bias_add(outputs, self.bias)
if self.activation is not None:
outputs = self.activation(outputs)
return outputs
// some code
and it is used in a simple model.
inputs = tf.keras.layers.Input(shape=(784,),dtype='float32')
layer1 = Custom_layer(2000, **Custom_layer_config, activation='tanh')(inputs)
layer2 = Custom_layer(200, **Custom_layer_config, activation='tanh')(layer1)
output_lay = Custom_layer(10, **Custom_layer_config, activation='softmax')(layer2)
model = tf.keras.models.Model(inputs=inputs, outputs=output_lay)
opt = tf.keras.optimizers.Adamax(learning_rate=0.02)
model.compile(optimizer=opt,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.summary()
It is supposed to print like this:
Model: "functional_13"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_8 (InputLayer) [(None, 784)] 0
_________________________________________________________________
CustomLayer_18 (Custom_Layer) (None, 2000) 1570784
_________________________________________________________________
CustomLayer_19 (Custom_Layer) (None, 200) 402200
_________________________________________________________________
CustomLayer_20 (Custom_Layer) (None, 10) 2210
=================================================================
Total params: 1,975,194
Trainable params: 5,194
Non-trainable params: 1,970,000
_________________________________________________________________
But prints this:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 784)] 0
tf.linalg.matmul_3 (TFOpLam (None, 2000) 0
bda)
tf.math.tanh_2 (TFOpLambda) (None, 2000) 0
tf.linalg.matmul_4 (TFOpLam (None, 200) 0
bda)
tf.math.tanh_3 (TFOpLambda) (None, 200) 0
tf.linalg.matmul_5 (TFOpLam (None, 10) 0
bda)
tf.compat.v1.nn.softmax_1 ( (None, 10) 0
TFOpLambda)
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
The first summary is what I got from author's repository and the second summary is from my run of the same code without changing anything..
The code is not a complex one but it is weird why there is no parameters at all.
My question is that what is wrong here.

Try to make it as an inherited class from this example.
Sample: Custom LSTM class
import tensorflow as tf
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Definition
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyLSTMLayer( tf.keras.layers.LSTM ):
def __init__(self, units, return_sequences, return_state):
super(MyLSTMLayer, self).__init__( units, return_sequences=True, return_state=False )
self.num_units = units
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_units])
def call(self, inputs):
return tf.matmul(inputs, self.kernel)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
start = 3
limit = 12
delta = 3
sample = tf.range( start, limit, delta )
sample = tf.cast( sample, dtype=tf.float32 )
sample = tf.constant( sample, shape=( 1, 1, 3 ) )
layer = MyLSTMLayer( 3, True, False )
model = tf.keras.Sequential([
tf.keras.Input(shape=(1, 3)),
layer,
])
model.summary()
print( sample )
print( model.predict(sample) )
Output:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
my_lstm_layer (MyLSTMLayer) (None, 1, 3) 9
=================================================================
Total params: 9
Trainable params: 9
Non-trainable params: 0
_________________________________________________________________
tf.Tensor([[[3. 6. 9.]]], shape=(1, 1, 3), dtype=float32)
1/1 [==============================] - 1s 575ms/step
[[[-2.8894916 -2.146874 13.688236 ]]]

Related

Shapes mismatch in Tensorflow model

I wrote the following model fn:
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf
def get_model(num_features, output_size, output_bias=None):
output_bias = tf.keras.initializers.Constant(output_bias)
opt = Adam(learning_rate=0.0008)
inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
layers = LSTM(32, activation='tanh')(
inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(output_size, activation='sigmoid',
bias_initializer=output_bias)(layers)
model = Model(inputs, layers)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=2)])
model.summary()
return model
here is the model summary:
Model: "model_5"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_6 (InputLayer) [(None, None, 11)] 0
__________________________________________________________________________________________________
input.row_lengths_5 (InstanceMe (None,) 0 input_6[0][0]
__________________________________________________________________________________________________
input.to_tensor_5 (InstanceMeth (None, None, 11) 0 input_6[0][0]
__________________________________________________________________________________________________
tf.sequence_mask_5 (TFOpLambda) (None, None) 0 input.row_lengths_5[0][0]
__________________________________________________________________________________________________
lstm_5 (LSTM) (None, 32) 5632 input.to_tensor_5[0][0]
tf.sequence_mask_5[0][0]
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 32) 128 lstm_5[0][0]
__________________________________________________________________________________________________
dropout_15 (Dropout) (None, 32) 0 batch_normalization_15[0][0]
__________________________________________________________________________________________________
dense_15 (Dense) (None, 32) 1056 dropout_15[0][0]
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 32) 128 dense_15[0][0]
__________________________________________________________________________________________________
dropout_16 (Dropout) (None, 32) 0 batch_normalization_16[0][0]
__________________________________________________________________________________________________
dense_16 (Dense) (None, 32) 1056 dropout_16[0][0]
__________________________________________________________________________________________________
batch_normalization_17 (BatchNo (None, 32) 128 dense_16[0][0]
__________________________________________________________________________________________________
dropout_17 (Dropout) (None, 32) 0 batch_normalization_17[0][0]
__________________________________________________________________________________________________
dense_17 (Dense) (None, 1) 33 dropout_17[0][0]
==================================================================================================
Total params: 8,161
Trainable params: 7,969
Non-trainable params: 192
__________________________________________________________________________________________________
And here are the shapes of my data:
print(train_x.shape,train_y.shape)
print(val_x.shape,val_y.shape)
(52499, None, 11) (52499,)
(17500, None, 11) (17500,)
When trying to fit my model, I get the following error:
model.fit(train_x, train_y, epochs=300, batch_size=500, validation_data=(val_x, val_y))
ValueError: Dimension 0 in both shapes must be equal, but are 2 and 1. Shapes are [2] and [1].
I can't understand what is wrong with the shapes.
Your model seems fine. The problem is that you are running into an open issue with the tfa.metrics.F1Score. For your binary case, you will have to change the parameters of the F1Score to tfa.metrics.F1Score(num_classes=1, threshold=0.5). Here is a complete working example:
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow_addons as tfa
import tensorflow as tf
def get_model(num_features, output_size, output_bias=0.001):
output_bias = tf.keras.initializers.Constant(output_bias)
opt = Adam(learning_rate=0.0008)
inputs = Input(shape=[None, num_features], dtype=tf.float32, ragged=True)
layers = LSTM(32, activation='tanh')(
inputs.to_tensor(), mask=tf.sequence_mask(inputs.row_lengths()))
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(32, activation='relu')(layers)
layers = BatchNormalization()(layers)
layers = Dropout(0.05)(layers)
layers = Dense(output_size, activation='sigmoid',
bias_initializer=output_bias)(layers)
model = Model(inputs, layers)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt, metrics=[tfa.metrics.F1Score(num_classes=1, threshold=0.5)])
model.summary()
return model
model = get_model(11, 1)
rt = tf.RaggedTensor.from_row_splits(values=tf.ones([5, 11], tf.int32),
row_splits=[0, 2, 5])
model.fit(rt, tf.random.uniform((2,1), maxval=2), epochs=300, batch_size=2, verbose=2)
Alternatively, you just define your own F1Score method and set it as metric in your model. See this post for more information.

Trying to build Encoder Decoder with Attention Mechanism, but graph results always disconnected, pls could you help me?

dimensionality = 4
#trainint encoder
encoder_inputs = Input(shape=(None, num_encoder_tokens))
decoder_inputs = Input(shape=(None, num_decoder_tokens))
encoder = Bidirectional(LSTM(dimensionality, return_sequences=True, return_state=True,
go_backwards=True), merge_mode='sum')
encoder_outputs, for_h, for_c, bac_h, bac_c = encoder(encoder_inputs)
encoder_states = [tf.add(for_h, for_c), tf.add(bac_h, bac_h) ]
#training decoder
decoder = LSTM(dimensionality, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder(decoder_inputs, initial_state= encoder_states)
dot_prod = dot([decoder_outputs, encoder_outputs], axes=[2, 2])
attention = Activation('softmax', name='attention')
attention_vec = attention(dot_prod)
context = dot([attention_vec, encoder_outputs], axes=[2, 1])
decoder_comb = concatenate([context, decoder_outputs], name='decoder_comb')
dense = Dense(num_decoder_tokens, activation='softmax')
output = dense(decoder_comb)
training_model = Model([encoder_inputs, decoder_inputs], output)
Here you can find summary:
Model: "functional_12"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_13 (InputLayer) [(None, None, 1780)] 0
__________________________________________________________________________________________________
bidirectional_2 (Bidirectional) [(None, None, 4), (N 57120 input_13[0][0]
__________________________________________________________________________________________________
input_14 (InputLayer) [(None, None, 2257)] 0
__________________________________________________________________________________________________
tf_op_layer_Add_4 (TensorFlowOp [(None, 4)] 0 bidirectional_2[0][1]
bidirectional_2[0][2]
__________________________________________________________________________________________________
tf_op_layer_Add_5 (TensorFlowOp [(None, 4)] 0 bidirectional_2[0][3]
bidirectional_2[0][3]
__________________________________________________________________________________________________
lstm_5 (LSTM) [(None, None, 4), (N 36192 input_14[0][0]
tf_op_layer_Add_4[0][0]
tf_op_layer_Add_5[0][0]
__________________________________________________________________________________________________
dot_12 (Dot) (None, None, None) 0 lstm_5[0][0]
bidirectional_2[0][0]
__________________________________________________________________________________________________
attention (Activation) (None, None, None) 0 dot_12[0][0]
__________________________________________________________________________________________________
dot_13 (Dot) (None, None, 4) 0 attention[0][0]
bidirectional_2[0][0]
__________________________________________________________________________________________________
decoder_comb (Concatenate) (None, None, 8) 0 dot_13[0][0]
lstm_5[0][0]
__________________________________________________________________________________________________
dense_2 (Dense) (None, None, 2257) 20313 decoder_comb[0][0]
==================================================================================================
Total params: 113,625
Trainable params: 113,625
Non-trainable params: 0
__________________________________________________________________________________________________
And finally below I paste my attempt to separate encoder and decoder in order to do inference, but it raises an error. I tried to use training_model layers/output/input whenever I could but there is still something that I am missing.
#inference encoder
encoder_model = Model(encoder_inputs, encoder_states)
#inference decoder
decoder_s_h = Input(shape=(dimensionality, ))
decoder_s_c = Input(shape=(dimensionality, ))
decoder_states_inputs = [decoder_s_h, decoder_s_c]
decoder_outputs, state_h, state_c = decoder(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
dot_prod = dot([decoder_outputs, encoder_outputs], axes=[2, 2])
attention_vec = attention(dot_prod)
context = dot([attention_vec, encoder_outputs], axes=[2, 1])
decoder_comb = concatenate([context, decoder_outputs])
output= dense(decoder_comb)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [output] + decoder_states)
I tried so many times to change this configuration but I can't resolve graph disconnection. Could you help me?
PS. I am new to NLP so pls be kind with me, I am a student still not a deep learning specialist...
Thank you so much for your time and help!
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-44-89f9761124cf> in <module>()
18 output= dense(decoder_comb)
19
---> 20 decoder_model = Model([decoder_inputs] + decoder_states_inputs, [output] + decoder_states)
21
22 #encoder decoder model
5 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/functional.py in _map_graph_network(inputs, outputs)
929 'The following previous layers '
930 'were accessed without issue: ' +
--> 931 str(layers_with_complete_input))
932 for x in nest.flatten(node.outputs):
933 computable_tensors.add(id(x))
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_13:0", shape=(None, None, 1780), dtype=float32) at layer "bidirectional_2". The following previous layers were accessed without issue: ['lstm_5']
You can not use input, output properties when creating models with functional api.
Try to change by something like that:
encoder_inputs = Input(shape=(None, num_encoder_tokens))
decoder_inputs = Input(shape=(None, num_decoder_tokens))
encoder_outputs, for_hidden, for_cell, bac_hidden, bac_cell = training_model(encoder_input, decoder_inputs)
Another error relates to this line:
encoder_model = Model(encoder_inputs, encoder_states)
where encoder_states is not dependent from encoder_input. So tensorflow is not able to build graph.

How can I use tf.keras.Model.summary to see the layers of a child model which in a father model?

I have a subclass Model of tf.keras.Model,code is following
import tensorflow as tf
class Mymodel(tf.keras.Model):
def __init__(self, classes, backbone_model, *args, **kwargs):
super(Mymodel, self).__init__(self, args, kwargs)
self.backbone = backbone_model
self.classify_layer = tf.keras.layers.Dense(classes,activation='sigmoid')
def call(self, inputs):
x = self.backbone(inputs)
x = self.classify_layer(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
model = Mymodel(inputs=inputs, classes=61,
backbone_model=tf.keras.applications.MobileNet())
model.build(input_shape=(20, 224, 224, 3))
model.summary()
the result is :
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
mobilenet_1.00_224 (Model) (None, 1000) 4253864
_________________________________________________________________
dense (Dense) multiple 61061
=================================================================
Total params: 4,314,925
Trainable params: 4,293,037
Non-trainable params: 21,888
_________________________________________________________________
but I want to see the all layers of mobilenet,then I tried to extract all layers of mobilenet and put in the model:
import tensorflow as tf
class Mymodel(tf.keras.Model):
def __init__(self, classes, backbone_model, *args, **kwargs):
super(Mymodel, self).__init__(self, args, kwargs)
self.backbone = backbone_model
self.classify_layer = tf.keras.layers.Dense(classes,activation='sigmoid')
def my_process_layers(self,inputs):
layers = self.backbone.layers
tmp_x = inputs
for i in range(1,len(layers)):
tmp_x = layers[i](tmp_x)
return tmp_x
def call(self, inputs):
x = self.my_process_layers(inputs)
x = self.classify_layer(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
model = Mymodel(inputs=inputs, classes=61,
backbone_model=tf.keras.applications.MobileNet())
model.build(input_shape=(20, 224, 224, 3))
model.summary()
then the resule not changed.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
mobilenet_1.00_224 (Model) (None, 1000) 4253864
_________________________________________________________________
dense (Dense) multiple 61061
=================================================================
Total params: 4,314,925
Trainable params: 4,293,037
Non-trainable params: 21,888
_________________________________________________________________
then I tried to extract one layer insert to the model :
import tensorflow as tf
class Mymodel(tf.keras.Model):
def __init__(self, classes, backbone_model, *args, **kwargs):
super(Mymodel, self).__init__(self, args, kwargs)
self.backbone = backbone_model
self.classify_layer = tf.keras.layers.Dense(classes,activation='sigmoid')
def call(self, inputs):
x = self.backbone.layers[1](inputs)
x = self.classify_layer(x)
return x
inputs = tf.keras.Input(shape=(224, 224, 3))
model = Mymodel(inputs=inputs, classes=61,
backbone_model=tf.keras.applications.MobileNet())
model.build(input_shape=(20, 224, 224, 3))
model.summary()
It did not change either.I am so confused.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
mobilenet_1.00_224 (Model) (None, 1000) 4253864
_________________________________________________________________
dense (Dense) multiple 244
=================================================================
Total params: 4,254,108
Trainable params: 4,232,220
Non-trainable params: 21,888
_________________________________________________________________
but I find that the parameter of dense layer changed,I dont know what happend.
#Ioannis 's answer is perfectly fine, but unfortunately it drops the keras 'Model Subclassing' structure that is present in the question. If, just like me, you want to keep this model subclassing and still show all layers in the summary, you can branch down into all the individual layers of the more complex model using a for loop:
class MyMobileNet(tf.keras.Sequential):
def __init__(self, input_shape=(224, 224, 3), classes=61):
super(MyMobileNet, self).__init__()
self.backbone_model = [layer for layer in
tf.keras.applications.MobileNet(input_shape, include_top=False, pooling='avg').layers]
self.classificator = tf.keras.layers.Dense(classes,activation='sigmoid', name='classificator')
def call(self, inputs):
x = inputs
for layer in self.backbone_model:
x = layer(x)
x = self.classificator(x)
return x
model = MyMobileNet()
After this we can directly build the model and call the summary:
model.build(input_shape=(None, 224, 224, 3))
model.summary()
>
Model: "my_mobile_net"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1_pad (ZeroPadding2D) (None, 225, 225, 3) 0
_________________________________________________________________
conv1 (Conv2D) (None, 112, 112, 32) 864
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32) 128
_________________________________________________________________
....
....
conv_pw_13 (Conv2D) (None, 7, 7, 1024) 1048576
_________________________________________________________________
conv_pw_13_bn (BatchNormaliz (None, 7, 7, 1024) 4096
_________________________________________________________________
conv_pw_13_relu (ReLU) (None, 7, 7, 1024) 0
_________________________________________________________________
global_average_pooling2d_13 (None, 1024) 0
_________________________________________________________________
classificator (Dense) multiple 62525
=================================================================
Total params: 3,291,389
Trainable params: 3,269,501
Non-trainable params: 21,888
_________________________________________________________________
In order to be able to view backbone's layers, you' ll have to construct your new model using backbone.input and backbone.output
from tensorflow.keras.models import Model
def Mymodel(backbone_model, classes):
backbone = backbone_model
x = backbone.output
x = tf.keras.layers.Dense(classes,activation='sigmoid')(x)
model = Model(inputs=backbone.input, outputs=x)
return model
input_shape = (224, 224, 3)
model = Mymodel(backbone_model=tf.keras.applications.MobileNet(input_shape=input_shape, include_top=False, pooling='avg'),
classes=61)
model.summary()
There is an argument expand_nested in the Method summary.
model.summary(expand_nested=True)
for layer in model.layers:
layer.summary()

Keras - functional and sequential model give different results

I implemented two models, one time using the sequential way and one time with the functional API. Now the both models give different results, which kind of makes no sense to me.
I cannot figure out, what the problem is. Any ideas or solutions?
Here both models:
Sequential Model:
model = Sequential()
embedding_layer = Embedding(VOCAB_SIZE +1, EMBEDDING_SIZE, mask_zero= True)
model.add(embedding_layer)
model.add(Bidirectional(LSTM(HIDDEN_SIZE, return_sequences= True))),
model.add(TimeDistributed(Dense(NUM_LABELS, activation='softmax')))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_sents_padded, train_labels_padded, batch_size=4, epochs=10,
validation_data=(dev_sents_padded, dev_labels_padded))
score, acc = model.evaluate(dev_sents_padded, dev_labels_padded)
print("\nAccuracy: ", acc)
Functional Model:
inputs = Input(shape=(MAX_LENGTH,))
embedding = Embedding(VOCAB_SIZE +1, EMBEDDING_SIZE, mask_zero= True)(inputs)
left = LSTM(HIDDEN_SIZE, return_sequences=True)(embedding)
right = LSTM(HIDDEN_SIZE, go_backwards=True, return_sequences=True)
(embedding)
left_right = concatenate([left, right])
left_right = TimeDistributed(Dense(NUM_LABELS, activation='softmax'))
(left_right)
combined_model = Model(inputs=inputs, outputs=left_right)
combined_model.compile(loss='categorical_crossentropy', optimizer='adam',
metrics=['accuracy'])
combined_model.fit(
train_sents_padded,
train_labels_padded,
batch_size=4,
epochs=10,
validation_data=(dev_sents_padded, dev_labels_padded)
)
score, acc = combined_model.evaluate(dev_sents_padded, dev_labels_padded)
print("\nBidirectional LSTM Accuracy: ", acc)
+++
Summaries:
Sequential model:
Layer (type) Output Shape Param #
=================================================================
embedding_1 (Embedding) (None, None, 50) 26150
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 100) 40400
_________________________________________________________________
time_distributed_1 (TimeDist (None, None, 61) 6161
=================================================================
Total params: 72,711
Trainable params: 72,711
Non-trainable params: 0 `
Functional model:
Layer (type) Output Shape Param # Connected to
========================================================================
input_1 (InputLayer) (None, 34) 0
____________________________________________________________________
embedding_2 (Embedding) (None, 34, 50) 26150 input_1[0][0]
______________________________________________________________________
lstm_2 (LSTM) (None, 34, 50) 20200 embedding_2[0][0]
____________________________________________________________________
lstm_3 (LSTM) (None, 34, 50) 20200 embedding_2[0][0]
_________________________________________________________________
concatenate_1 (Concatenate)(None, 34, 100) 0 lstm_2[0][0]
lstm_3[0][0]
___________________________________________________________
time_distributed_2 (TimeDistrib (None, 34, 61) 6161 concatenate_1[0][0]
=====================================================================
Total params: 72,711
Trainable params: 72,711
Non-trainable params: 0`
+++
If I change VOCAB_SIZE + 1 to VOCAB_SIZE in the seqential model the acc is 59, but only on every third run??

How to implement custom layer with multiple input in Keras

I need to implement a custom layer like this:
class MaskedDenseLayer(Layer):
def __init__(self, output_dim, activation, **kwargs):
self.output_dim = output_dim
super(MaskedDenseLayer, self).__init__(**kwargs)
self._activation = activations.get(activation)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[0][1], self.output_dim),
initializer='glorot_uniform',
trainable=True)
super(MaskedDenseLayer, self).build(input_shape)
def call(self, l):
self.x = l[0]
self._mask = l[1][1]
print('kernel:', self.kernel)
masked = Multiply()([self.kernel, self._mask])
self._output = K.dot(self.x, masked)
return self._activation(self._output)
def compute_output_shape(self, input_shape):
return (input_shape[0][0], self.output_dim)
This is just like the way Keras API introduces to implement custom layers.
And I need to give two inputs to this layer like this:
def main():
with np.load('datasets/simple_tree.npz') as dataset:
inputsize = dataset['inputsize']
train_length = dataset['train_length']
train_data = dataset['train_data']
valid_length = dataset['valid_length']
valid_data = dataset['valid_data']
test_length = dataset['test_length']
test_data = dataset['test_data']
params = dataset['params']
num_of_all_masks = 20
num_of_hlayer = 6
hlayer_size = 5
graph_size = 4
all_masks = generate_all_masks(num_of_all_masks, num_of_hlayer, hlayer_size, graph_size)
input_layer = Input(shape=(4,))
mask_1 = Input( shape = (graph_size , hlayer_size) )
mask_2 = Input( shape = (hlayer_size , hlayer_size) )
mask_3 = Input( shape = (hlayer_size , hlayer_size) )
mask_4 = Input( shape = (hlayer_size , hlayer_size) )
mask_5 = Input( shape = (hlayer_size , hlayer_size) )
mask_6 = Input( shape = (hlayer_size , hlayer_size) )
mask_7 = Input( shape = (hlayer_size , graph_size) )
hlayer1 = MaskedDenseLayer(hlayer_size, 'relu')( [input_layer, mask_1] )
hlayer2 = MaskedDenseLayer(hlayer_size, 'relu')( [hlayer1, mask_2] )
hlayer3 = MaskedDenseLayer(hlayer_size, 'relu')( [hlayer2, mask_3] )
hlayer4 = MaskedDenseLayer(hlayer_size, 'relu')( [hlayer3, mask_4] )
hlayer5 = MaskedDenseLayer(hlayer_size, 'relu')( [hlayer4, mask_5] )
hlayer6 = MaskedDenseLayer(hlayer_size, 'relu')( [hlayer5, mask_6] )
output_layer = MaskedDenseLayer(graph_size, 'sigmoid')( [hlayer6, mask_7] )
autoencoder = Model(inputs=[input_layer, mask_1, mask_2, mask_3,
mask_4, mask_5, mask_6, mask_7], outputs=[output_layer])
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
#reassign_mask = ReassignMask()
for i in range(0, num_of_all_masks):
state = np.random.randint(0,20)
autoencoder.fit(x=[train_data,
np.tile(all_masks[state][0], [300, 1, 1]),
np.tile(all_masks[state][1], [300, 1, 1]),
np.tile(all_masks[state][2], [300, 1, 1]),
np.tile(all_masks[state][3], [300, 1, 1]),
np.tile(all_masks[state][4], [300, 1, 1]),
np.tile(all_masks[state][5], [300, 1, 1]),
np.tile(all_masks[state][6], [300, 1, 1])],
y=[train_data],
epochs=1,
batch_size=20,
shuffle=True,
#validation_data=(valid_data, valid_data),
#callbacks=[reassign_mask],
verbose=1)
Unfortunately when i run this code i get the following error:
TypeError: can only concatenate tuple (not "int") to tuple
What i need is a way to implement a custom layer with two inputs containing previous layer and a mask matrix.
Here the all_mask variable is a list containing some pre-generated masks for all layers.
Can anyone help? What's wrong here with my code.
Update
Some parameters:
train data: (300, 4)
number of hidden layers: 6
hidden layer units: 5
mask: (size of previous layer, size of current layer)
And here is my model summary:
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_361 (InputLayer) (None, 4) 0
__________________________________________________________________________________________________
input_362 (InputLayer) (None, 4, 5) 0
__________________________________________________________________________________________________
masked_dense_layer_281 (MaskedD (None, 5) 20 input_361[0][0]
input_362[0][0]
__________________________________________________________________________________________________
input_363 (InputLayer) (None, 5, 5) 0
__________________________________________________________________________________________________
masked_dense_layer_282 (MaskedD (None, 5) 25 masked_dense_layer_281[0][0]
input_363[0][0]
__________________________________________________________________________________________________
input_364 (InputLayer) (None, 5, 5) 0
__________________________________________________________________________________________________
masked_dense_layer_283 (MaskedD (None, 5) 25 masked_dense_layer_282[0][0]
input_364[0][0]
__________________________________________________________________________________________________
input_365 (InputLayer) (None, 5, 5) 0
__________________________________________________________________________________________________
masked_dense_layer_284 (MaskedD (None, 5) 25 masked_dense_layer_283[0][0]
input_365[0][0]
__________________________________________________________________________________________________
input_366 (InputLayer) (None, 5, 5) 0
__________________________________________________________________________________________________
masked_dense_layer_285 (MaskedD (None, 5) 25 masked_dense_layer_284[0][0]
input_366[0][0]
__________________________________________________________________________________________________
input_367 (InputLayer) (None, 5, 5) 0
__________________________________________________________________________________________________
masked_dense_layer_286 (MaskedD (None, 5) 25 masked_dense_layer_285[0][0]
input_367[0][0]
__________________________________________________________________________________________________
input_368 (InputLayer) (None, 5, 4) 0
__________________________________________________________________________________________________
masked_dense_layer_287 (MaskedD (None, 4) 20 masked_dense_layer_286[0][0]
input_368[0][0]
==================================================================================================
Total params: 165
Trainable params: 165
Non-trainable params: 0
Your input_shape is a list of tuples.
input_shape: [(None, 4), (None, 4, 5)]
You can't simply use input_shape[0] or input_shape[1]. If you want to use the actual values, you have to choose which tuple, then which value. Example:
self.kernel = self.add_weight(name='kernel',
#here:
shape=(input_shape[0][1], self.output_dim),
initializer='glorot_uniform',
trainable=True)
The same would be necessary (following your own shape rules) in the method compute_output_shape, where it seems what you want is to concatenate tuples:
return input_shape[0] + (self.output_dim,)
Don't forget to uncomment the super(MaskedDenseLayer, self).build(input_shape) line.

Categories