I am getting an error in Tensorflow 2. How can I solve it?
Here is my code (assume all the relevant modules/objects of Keras have been imported):
dense1 = 2**7
dense2 = 2**8
dense3 = 2**9
dropout = 0.8
price_loss = 1
cut_loss = 1
activation= LeakyReLU()
#====================================================================
# INPUTS
#====================================================================
#----------------------------------------------------------------
carat = Input(
shape= (1,),
batch_size= batch_size,
name= 'carat'
)
#----------------------------------------------------------------
color = Input(
shape= (1,),
batch_size= batch_size,
name= 'color'
)
#----------------------------------------------------------------
clarity = Input(
shape= (1,),
batch_size= batch_size,
name= 'clarity'
)
#----------------------------------------------------------------
depth = Input(
shape= (1,),
batch_size= batch_size,
name= 'depth'
)
#----------------------------------------------------------------
table = Input(
shape= (1,),
batch_size= batch_size,
name= 'table'
)
#----------------------------------------------------------------
x = Input(
shape= (1,),
batch_size= batch_size,
name= 'x'
)
#----------------------------------------------------------------
y = Input(
shape= (1,),
batch_size= batch_size,
name= 'y'
)
#----------------------------------------------------------------
z = Input(
shape= (1,),
batch_size= batch_size,
name= 'z'
)
#----------------------------------------------------------------
#====================================================================
# CREATE EMBEDDINGS FOR CATEGORICAL FEATURES "COLOR" AND "CLARITY"
#====================================================================
color = Embedding(input_dim = 7, output_dim = 1, name = 'color_emb')(color)
clarity = Embedding(input_dim = 8, output_dim = 1, name = 'clarity_emb')(clarity)
color = Flatten()(color)
clarity = Flatten()(clarity)
#====================================================================
# CONCATENATE FEATURES
#====================================================================
x = Concatenate()([color, clarity, carat, depth, table, x, y, z])
#====================================================================
# DENSE NETWORK
#====================================================================
x = Dense(dense1, activation = activation)(x)
x = BatchNormalization()(x)
x = Dense(dense2, activation = activation)(x)
x = BatchNormalization()(x)
x = Dense(dense3, activation = activation)(x)
x = BatchNormalization()(x)
x = Dropout(dropout)(x)
#====================================================================
# PREDICTIONS
# ====================================================================
cut = Dense(1, activation = 'sigmoid')(x)
price = Dense(1)(x)
#====================================================================
# DEFINE THE MODEL
# ====================================================================
model = Model(inputs = [carat, color, clarity, depth, table, x, y, z] , outputs = [cut , price])
#====================================================================
# COMPILE THE MODEL
# ====================================================================
model.compile(
optimizer = 'Adam',
loss = {
"price": "huber_loss",
"cut": "binary_crossentropy",
},
loss_weights = [price_loss, cut_loss],
metrics = {
"price": ["mean_absolute_percentage_error"],
"cut": [tf.keras.metrics.AUC(), tf.keras.metrics.Precision(thresholds = thresholds)],
}
)
Stack trace:
WARNING:tensorflow:Functional inputs must come from `tf.keras.Input` (thus holding past layer metadata), they cannot be the output of a previous non-Input layer. Here, a tensor specified as input to "functional_1" was not an Input tensor, it was generated by layer flatten_8.
Note that input tensors are instantiated via `tensor = tf.keras.Input(shape)`.
The tensor that caused the issue was: flatten_8/Reshape:0
WARNING:tensorflow:Functional inputs must come from `tf.keras.Input` (thus holding past layer metadata), they cannot be the output of a previous non-Input layer. Here, a tensor specified as input to "functional_1" was not an Input tensor, it was generated by layer flatten_9.
Note that input tensors are instantiated via `tensor = tf.keras.Input(shape)`.
The tensor that caused the issue was: flatten_9/Reshape:0
WARNING:tensorflow:Functional inputs must come from `tf.keras.Input` (thus holding past layer metadata), they cannot be the output of a previous non-Input layer. Here, a tensor specified as input to "functional_1" was not an Input tensor, it was generated by layer dropout_2.
Note that input tensors are instantiated via `tensor = tf.keras.Input(shape)`.
The tensor that caused the issue was: dropout_2/cond/Identity:0
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-64-132a2d8458b9> in <module>
135 # ====================================================================
136
--> 137 model = Model(inputs = [carat, color, clarity, depth, table, x, y, z] , outputs = [cut , price])
138
139 #====================================================================
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\training.py in __new__(cls, *args, **kwargs)
240 # Functional model
241 from tensorflow.python.keras.engine import functional # pylint: disable=g-import-not-at-top
--> 242 return functional.Functional(*args, **kwargs)
243 else:
244 return super(Model, cls).__new__(cls, *args, **kwargs)
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\functional.py in __init__(self, inputs, outputs, name, trainable)
113 # 'arguments during initialization. Got an unexpected argument:')
114 super(Functional, self).__init__(name=name, trainable=trainable)
--> 115 self._init_graph_network(inputs, outputs)
116
117 #trackable.no_automatic_dependency_tracking
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\functional.py in _init_graph_network(self, inputs, outputs)
189 # Keep track of the network's nodes and layers.
190 nodes, nodes_by_depth, layers, _ = _map_graph_network(
--> 191 self.inputs, self.outputs)
192 self._network_nodes = nodes
193 self._nodes_by_depth = nodes_by_depth
~\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\engine\functional.py in _map_graph_network(inputs, outputs)
929 'The following previous layers '
930 'were accessed without issue: ' +
--> 931 str(layers_with_complete_input))
932 for x in nest.flatten(node.outputs):
933 computable_tensors.add(id(x))
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("clarity_8:0", shape=(20, 1), dtype=float32) at layer "clarity_emb". The following previous layers were accessed without issue: []
pay attention to not override the input variables. you overrode color, clarity and x input inside the network
here a possible solution:
dense1 = 2**7
dense2 = 2**8
dense3 = 2**9
dropout = 0.8
price_loss = 1
cut_loss = 1
activation= LeakyReLU()
batch_size = 32
#====================================================================
# INPUTS
#====================================================================
carat = Input(shape= (1,), batch_size= batch_size, name= 'carat')
Color = Input(shape= (1,), batch_size= batch_size, name= 'color')
Clarity = Input(shape= (1,), batch_size= batch_size, name= 'clarity')
depth = Input(shape= (1,), batch_size= batch_size, name= 'depth')
table = Input(shape= (1,), batch_size= batch_size, name= 'table')
X = Input(shape= (1,), batch_size= batch_size, name= 'x')
y = Input(shape= (1,), batch_size= batch_size, name= 'y')
z = Input(shape= (1,), batch_size= batch_size, name= 'z')
#====================================================================
# CREATE EMBEDDINGS FOR CATEGORICAL FEATURES "COLOR" AND "CLARITY"
#====================================================================
color = Embedding(input_dim = 7, output_dim = 1, name = 'color_emb')(Color)
clarity = Embedding(input_dim = 8, output_dim = 1, name = 'clarity_emb')(Clarity)
color = Flatten()(color)
clarity = Flatten()(clarity)
#====================================================================
# CONCATENATE FEATURES
#====================================================================
x = Concatenate()([color, clarity, carat, depth, table, X, y, z])
#====================================================================
# DENSE NETWORK
#====================================================================
x = Dense(dense1, activation = activation)(x)
x = BatchNormalization()(x)
x = Dense(dense2, activation = activation)(x)
x = BatchNormalization()(x)
x = Dense(dense3, activation = activation)(x)
x = BatchNormalization()(x)
x = Dropout(dropout)(x)
#====================================================================
# PREDICTIONS
# ====================================================================
cut = Dense(1, activation = 'sigmoid')(x)
price = Dense(1)(x)
#====================================================================
# DEFINE THE MODEL
# ====================================================================
model = Model(inputs = [carat, Color, Clarity, depth, table, X, y, z] ,
outputs = [cut , price])
model.compile('adam', 'mse')
model.summary()
here the running notebook: https://colab.research.google.com/drive/1akpvuGKHXt6Frrec692zvCMAfRzZbIaM?usp=sharing
Well, like always when we are stuck with some error, the answer lies somewhere in error message. Please note that while you have been able to build the structure of your model overwriting your variables color, clarity and so on... the variables changed the type. At the beginning you initialize the variables to be tf.keras.Input and then you overwrite it with Embedding and Flatten layers. So when it comes to building your model, you ask to build model with inputs that are no longer tf.keras.Input but something else.
The solution is to not overwrite those variables.
Related
I implemented this code for node classification task on IEEE-118bus dataset. The input size x=[2960,118], float32 numpy array and y=[2960,1] is 0 or 1 labels for each graph. My goal is to run a binary classification on the x graphs with batchsize 2960 based on their node features. Below is my code:
hidden_units = [64, 64];learning_rate = 0.01;dropout_rate = 0.5;num_epochs = 50;batch_size = 256
def run_experiment(model, x_train, y_train):
# Compile the model.
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss=keras.losses.BinaryCrossentropy(from_logits=True),
metrics=[keras.metrics.BinaryAccuracy(name="acc")],
)
# Create an early stopping callback.
early_stopping = keras.callbacks.EarlyStopping(
monitor='loss', patience=110, restore_best_weights=True
)
history = model.fit(
x=x_train,
y=y_train,
epochs=num_epochs,
batch_size=batch_size,
validation_split=0.1,
callbacks=[early_stopping],
)
return history
def create_ffn(hidden_units, dropout_rate, name=None):
fnn_layers = []
for units in hidden_units:
fnn_layers.append(layers.BatchNormalization())
fnn_layers.append(layers.Dropout(dropout_rate))
fnn_layers.append(layers.Dense(units, activation=tf.nn.relu))
return keras.Sequential(fnn_layers, name=name)
node_features = train_graph_pi
adj_new = adj_mat.tolist()
adj_t = torch.tensor(adj_new)
edge_index = adj_t.nonzero().t().contiguous()
edges = edge_index.numpy()
edge_weights = None
graph_info = (node_features, edges, edge_weights)
print("Edges shape:", edges.shape)
print("Nodes shape:", node_features.shape)
class GraphConvLayer(layers.Layer):
def __init__(
self,
hidden_units,
dropout_rate=0.5,
aggregation_type="sum",
combination_type="gru",
normalize=False,
*args,
**kwargs,
):
super(GraphConvLayer, self).__init__(*args, **kwargs)
self.aggregation_type = aggregation_type
self.combination_type = combination_type
self.normalize = normalize
self.ffn_prepare = create_ffn(hidden_units, dropout_rate)
if self.combination_type == "gated":
self.update_fn = layers.GRU(
units=hidden_units,
activation="tanh",
recurrent_activation="sigmoid",
dropout=dropout_rate,
return_state=True,
recurrent_dropout=dropout_rate,
)
else:
self.update_fn = create_ffn(hidden_units, dropout_rate)
def prepare(self, node_repesentations, weights=None):
# node_repesentations shape is [num_edges, embedding_dim].
messages = self.ffn_prepare(node_repesentations)
if weights is not None:
messages = messages * tf.expand_dims(weights, -1)
return messages
def aggregate(self, node_indices, neighbour_messages):
# node_indices shape is [num_edges].
# neighbour_messages shape: [num_edges, representation_dim].
num_nodes = tf.math.reduce_max(node_indices) + 1
if self.aggregation_type == "sum":
aggregated_message = tf.math.unsorted_segment_sum(
neighbour_messages, node_indices, num_segments=num_nodes
)
elif self.aggregation_type == "mean":
aggregated_message = tf.math.unsorted_segment_mean(
neighbour_messages, node_indices, num_segments=num_nodes
)
elif self.aggregation_type == "max":
aggregated_message = tf.math.unsorted_segment_max(
neighbour_messages, node_indices, num_segments=num_nodes
)
else:
raise ValueError(f"Invalid aggregation type: {self.aggregation_type}.")
return aggregated_message
def update(self, node_repesentations, aggregated_messages):
# node_repesentations shape is [num_nodes, representation_dim].
# aggregated_messages shape is [num_nodes, representation_dim].
if self.combination_type == "gru":
# Create a sequence of two elements for the GRU layer.
h = tf.stack([node_repesentations, aggregated_messages], axis=1)
elif self.combination_type == "concat":
# Concatenate the node_repesentations and aggregated_messages.
h = tf.concat([node_repesentations, aggregated_messages], axis=1)
elif self.combination_type == "add":
# Add node_repesentations and aggregated_messages.
h = node_repesentations + aggregated_messages
else:
raise ValueError(f"Invalid combination type: {self.combination_type}.")
# Apply the processing function.
node_embeddings = self.update_fn(h)
if self.combination_type == "gru":
node_embeddings = tf.unstack(node_embeddings, axis=1)[-1]
if self.normalize:
node_embeddings = tf.nn.l2_normalize(node_embeddings, axis=-1)
return node_embeddings
def call(self, inputs):
"""Process the inputs to produce the node_embeddings.
inputs: a tuple of three elements: node_repesentations, edges, edge_weights.
Returns: node_embeddings of shape [num_nodes, representation_dim].
"""
node_repesentations, edges, edge_weights = inputs
# Get node_indices (source) and neighbour_indices (target) from edges.
node_indices, neighbour_indices = edges[0], edges[1]
# neighbour_repesentations shape is [num_edges, representation_dim].
neighbour_repesentations = tf.gather(node_repesentations, neighbour_indices)
# Prepare the messages of the neighbours.
neighbour_messages = self.prepare(neighbour_repesentations, edge_weights)
# Aggregate the neighbour messages.
aggregated_messages = self.aggregate(node_indices, neighbour_messages)
# Update the node embedding with the neighbour messages.
return self.update(node_repesentations, aggregated_messages)
class GNNNodeClassifier(tf.keras.Model):
def __init__(
self,
graph_info,
num_classes,
hidden_units,
aggregation_type="sum",
combination_type="concat",
dropout_rate=0.5,
normalize=True,
*args,
**kwargs,
):
super(GNNNodeClassifier, self).__init__(*args, **kwargs)
# Unpack graph_info to three elements: node_features, edges, and edge_weight.
node_features, edges, edge_weights = graph_info
self.node_features = node_features
self.edges = edges
self.edge_weights = edge_weights
# Set edge_weights to ones if not provided.
if self.edge_weights is None:
self.edge_weights = tf.ones(shape=edges.shape[1])
# Scale edge_weights to sum to 1.
self.edge_weights = self.edge_weights / tf.math.reduce_sum(self.edge_weights)
# Create a process layer.
# Create the first GraphConv layer.
self.conv1 = GraphConvLayer(
hidden_units,
dropout_rate,
aggregation_type,
combination_type,
normalize,
name="graph_conv1",
)
# Create the second GraphConv layer.
self.conv2 = GraphConvLayer(
hidden_units,
dropout_rate,
aggregation_type,
combination_type,
normalize,
name="graph_conv2",
)
self.conv3 = GraphConvLayer(
hidden_units,
dropout_rate,
aggregation_type,
combination_type,
normalize,
name="graph_conv3",
)
self.conv4 = GraphConvLayer(
hidden_units,
dropout_rate,
aggregation_type,
combination_type,
normalize,
name="graph_conv4",
)
# Create a compute logits layer.
self.compute_logits = layers.Dense(units=num_classes-1, name="logits")
def call(self, input_node_indices):
# Apply the first graph conv layer.
x1 = self.conv1((self.node_features, self.edges, self.edge_weights))
# Skip connection.
# Apply the second graph conv layer.
x2 = self.conv2((x1, self.edges, self.edge_weights))
# Skip connection.
x = x2 + x1
# Apply the third graph conv layer.
x3 = self.conv3((x, self.edges, self.edge_weights))
# Skip connection.
x = x3 + x
# Apply the fourth graph conv layer.
x4 = self.conv4((x, self.edges, self.edge_weights))
# Skip connection.
x = x4 + x
# Fetch node embeddings for the input node_indices.
node_embeddings = tf.gather(x, input_node_indices)
# Compute logits
return self.compute_logits(node_embeddings)
gnn_model = GNNNodeClassifier(
graph_info=graph_info,
num_classes=num_classes,
hidden_units=hidden_units,
dropout_rate=dropout_rate,
name="gnn_model")
print("GNN output shape:", gnn_model([100]))
gnn_model.summary()
The output of the above architecture is as below:
=================================================================
graph_conv1 (GraphConvLayer multiple 415872
)
graph_conv2 (GraphConvLayer multiple 22016
)
graph_conv3 (GraphConvLayer multiple 22016
)
graph_conv4 (GraphConvLayer multiple 22016
)
logits (Dense) multiple 65
=================================================================
Total params: 481,985
Trainable params: 467,841
Non-trainable params: 14,144
However, when I begin training the model, I get the following error:
ValueError: logits and labels must have the same shape, received ((None, 118, 1) vs (None, 1, 1)).
However, when I change the from_logits=False in model.compile the model runs but either the model overfit or the loss and accuracy values are BAD!!!
I have changed the BinaryCrossEntropy loss function to other loss functions such as SparseCategoricalCrossEntropy and/or CategoricalCrossEntropy but still the error stands. Additionally, I have tried to add a Flatten() layer to my architecture but so far the same error shows up.
I would be thankful if anyone could help me with this issue! Thanks!
When I am defining the Hierarchical Attentional Network, an error is popping up which says "AttributeError: can't set attribute". Please help.
This is the Attention.py file
import keras
import Attention
from keras.engine.topology import Layer, Input
from keras import backend as K
from keras import initializers
#Hierarchical Attention Layer Implementation
'''
Implemented by Arkadipta De (MIT Licensed)
'''
class Hierarchical_Attention(Layer):
def __init__(self, attention_dim):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = attention_dim
super(Hierarchical_Attention, self).__init__()
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim, )))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weights = [self.W, self.b, self.u]
super(Hierarchical_Attention, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.dot(uit, self.u)
ait = K.squeeze(ait, -1)
ait = K.exp(ait)
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
ait = K.expand_dims(ait)
weighted_input = x * ait
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
This is the main file where I'm building the model.
import re
import os
import numpy as np
import pandas as pd
import keras
from keras.engine.topology import Layer, Input
import Attention
from sklearn.model_selection import train_test_split
from keras.models import Model, Input
from keras.layers import Dropout, Dense, LSTM, GRU, Bidirectional, concatenate, Multiply, Subtract
from keras.utils import to_categorical
from keras import backend as K
from keras import initializers
Max_Title_Length = 0
Max_Content_Length = 0
for i in range(0, len(X)):
Max_Title_Length = max(Max_Title_Length, len(X['title'][i]))
Max_Content_Length = max(Max_Content_Length, len(X['text'][i]))
vector_size = 100
input_title = Input(shape = (Max_Title_Length,vector_size,), name = 'input_title')
input_content = Input(shape = (Max_Content_Length,vector_size,), name = 'input_content')
def Classifier(input_title, input_content):
#x = Bidirectional(GRU(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_title)
x = Bidirectional(GRU(100, return_sequences=True))(input_title)
x_attention = Attention.Hierarchical_Attention(100)(x)
#y = Bidirectional(LSTM(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_content)
y = Bidirectional(GRU(100, return_sequences=True))(input_content)
y_attention = Attention.Hierarchical_Attention(100)(y)
z = concatenate([x_attention,y_attention])
z = Dense(units = 512, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 256, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 128, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 50, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 10, activation = 'relu')(z)
z = Dropout(0.2)(z)
output = Dense(units = 2, activation = 'softmax')(z)
model = Model(inputs = [input_title, input_content], outputs = output)
model.summary()
return model
def compile_and_train(model, num_epochs):
model.compile(optimizer= 'adam', loss= 'categorical_crossentropy', metrics=['acc'])
history = model.fit([train_x_title,train_x_content], train_label, batch_size=32, epochs=num_epochs)
return history
Classifier_Model = Classifier(input_title,input_content)
This code is giving me an error which says:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __setattr__(self, name, value)
2761 try:
-> 2762 super(tracking.AutoTrackable, self).__setattr__(name, value)
2763 except AttributeError:
AttributeError: can't set attribute
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
6 frames
<ipython-input-43-32804502e0b0> in <module>()
32 return history
33
---> 34 Classifier_Model = Classifier(input_title,input_content)
<ipython-input-43-32804502e0b0> in Classifier(input_title, input_content)
7 #x = Bidirectional(GRU(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_title)
8 x = Bidirectional(GRU(200, return_sequences=True))(input_title)
----> 9 x_attention = Attention.Hierarchical_Attention(100)(x)
10 #y = Bidirectional(LSTM(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_content)
11 y = Bidirectional(GRU(100, return_sequences=True))(input_content)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
927
928 # Maintains info about the `Layer.call` stack.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1096 # Build layer if applicable (if the `build` method has been
1097 # overridden).
-> 1098 self._maybe_build(inputs)
1099 cast_inputs = self._maybe_cast_inputs(inputs, input_list)
1100
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in _maybe_build(self, inputs)
2641 # operations.
2642 with tf_utils.maybe_init_scope(self):
-> 2643 self.build(input_shapes) # pylint:disable=not-callable
2644 # We must set also ensure that the layer is marked as built, and the build
2645 # shape is stored since user defined build functions may not be calling
/content/Attention.py in build(self, input_shape)
23 self.b = K.variable(self.init((self.attention_dim, )))
24 self.u = K.variable(self.init((self.attention_dim, 1)))
---> 25 self.trainable_weights = [self.W, self.b, self.u]
26 super(Hierarchical_Attention, self).build(input_shape)
27
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __setattr__(self, name, value)
2765 ('Can\'t set the attribute "{}", likely because it conflicts with '
2766 'an existing read-only #property of the object. Please choose a '
-> 2767 'different name.').format(name))
2768 return
2769
AttributeError: Can't set the attribute "trainable_weights", likely because it conflicts with an existing read-only #property of the object. Please choose a different name.
I'm a noob in Neural Networks. Please help.
I ran into the same problem when I was trying to execute the code on Google Colab.
I found some answers on StackOverflow says it's an ongoing issue with tf on Colab.
link here
It remains unsolved for me, but I believe you can try to set self._trainable_weights instead of self.trainable_weights
I'm trying to write this code into colab. Interestingly, I was running the same code in colab a few days ago but now it won't work. the code also works in kaggle kernel. I tried changing the TensorFlow version but all of them give different errors. Why do you think I can't run this code? This is the colab notebook if you needed more info.
Thanks in advance!
class DisasterDetector:
def __init__(self, tokenizer, bert_layer, max_len =30, lr = 0.0001,
epochs = 15, batch_size = 32, dtype = tf.int32 ,
activation = 'sigmoid', optimizer = 'SGD',
beta_1=0.9, beta_2=0.999, epsilon=1e-07,
metrics = 'accuracy', loss = 'binary_crossentropy'):
self.lr = lr
self.epochs = epochs
self.max_len = max_len
self.batch_size = batch_size
self.tokenizer = tokenizer
self.bert_layer = bert_layer
self.models = []
self.activation = activation
self.optimizer = optimizer
self.dtype = dtype
self.beta_1 = beta_1
self.beta_2 = beta_2
self.epsilon =epsilon
self.metrics = metrics
self.loss = loss
def encode(self, texts):
all_tokens = []
masks = []
segments = []
for text in texts:
tokenized = self.tokenizer.convert_tokens_to_ids(['[CLS]'] + self.tokenizer.tokenize(text) + ['[SEP]'])
len_zeros = self.max_len - len(tokenized)
padded = tokenized + [0] * len_zeros
mask = [1] * len(tokenized) + [0] * len_zeros
segment = [0] * self.max_len
all_tokens.append(padded)
masks.append(mask)
segments.append(segment)
print(len(all_tokens[0]))
return np.array(all_tokens), np.array(masks), np.array(segments)
def make_model(self):
input_word_ids = Input(shape = (self.max_len, ), dtype=tf.int32,
name = 'input_word_ids')
input_mask = Input(shape = (self.max_len, ), dtype=tf.int32,
name = 'input_mask')
segment_ids = Input(shape = (self.max_len, ), dtype=tf.int32,
name = 'segment_ids')
#pooled output is the output of dimention and
pooled_output, sequence_output = self.bert_layer([input_word_ids,
input_mask,
segment_ids])
clf_output = sequence_output[:, 0, :]
out = tf.keras.layers.Dense(1, activation = self.activation)(clf_output)
#out = tf.keras.layers.Dense(1, activation = 'sigmoid', input_shape = (clf_output,) )(clf_output)
model = Model(inputs = [input_word_ids, input_mask, segment_ids],
outputs = out)
if self.optimizer is 'SGD':
optimizer = SGD(learning_rate = self.lr)
elif self.optimizer is 'Adam':
optimizer = Adam(learning_rate = self.lr, beta_1=self.beta_1,
beta_2=self.beta_2, epsilon=self.epsilon)
model.compile(loss = self.loss, optimizer = self.optimizer,
metrics = [self.metrics])
return model
def train(self, x, k = 3):
kfold = StratifiedKFold(n_splits = k, shuffle = True)
for fold, (train_idx, val_idx) in enumerate(kfold.split(x['cleaned_text'], x['target'])):
print('fold: ', fold)
x_trn = self.encode(x.loc[train_idx, 'cleaned_text'])
x_val = self.encode(x.loc[val_idx, 'cleaned_text'])
y_trn = np.array(x.loc[train_idx, 'target'], dtype = np.uint8)
y_val = np.array(x.loc[val_idx, 'target'], dtype = np.uint8)
print('the data type of y train: ', type(y_trn))
print('x_val shape', x_val[0].shape)
print('x_trn shape', x_trn[0].shape)
model = self.make_model()
print('model made.')
model.fit(x_trn, tf.convert_to_tensor(y_trn),
validation_data = (x_val, tf.convert_to_tensor(y_val)),
batch_size=self.batch_size, epochs = self.epochs)
self.models.append(model)
and after calling the train function of the class I get that error.
classifier = DisasterDetector(tokenizer = tokenizer, bert_layer = bert_layer, max_len = max_len, lr = 0.0001,
epochs = 10, activation = 'sigmoid',
batch_size = 32,optimizer = 'SGD',
beta_1=0.9, beta_2=0.999, epsilon=1e-07)
classifier.train(train_cleaned)
and here is the error:
ValueError Traceback (most
recent call last)
<ipython-input-10-106c756f2e47> in <module>()
----> 1 classifier.train(train_cleaned)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
96 dtype = dtypes.as_dtype(dtype).as_datatype_enum
97 ctx.ensure_initialized()
---> 98 return ops.EagerTensor(value, ctx.device_name, dtype)
99
100
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).
Well, it turns out that by not giving the appropriate maximum sequence length, TensorFlow throws this error. By changing the max_len variable to 54 I could run my program with no difficulty. So the problem was not about the type of the input or the numpy arrays.
I want to classify text to 2 classes by using this embedding: https://tfhub.dev/google/universal-sentence-encoder-multilingual/3
And I also want to add additional features after the embedding. So I have two inputs:
import tensorflow as tf
import tensorflow_hub as tfh
import tensorflow_datasets as tfds
import tensorflow_text as tft
hp = {
'embedding': 'https://tfhub.dev/google/universal-sentence-encoder-multilingual/3' EMBEDDINGS['senm'],
'units': 64,
'learning_rate': 1e-3,
'dropout': 0.2,
'layers': 2
}
textInput = tf.keras.Input(shape=(1, ), name = 'text', dtype = tf.string)
featuresInput = tf.keras.Input(shape=(36, ), name = 'features')
x = tfh.KerasLayer(hp.get('embedding'), dtype = tf.string, trainable = False)(textInput)
x = tf.keras.layers.concatenate([x, featuresInput])
for index in range(hp.get('layers')):
x = tf.keras.layers.Dense(hp.get('units'), activation = 'relu')(x)
x = tf.keras.layers.Dropout(hp.get('dropout'))(x)
output = tf.keras.layers.Dense(
1,
activation = 'sigmoid',
bias_initializer = tf.keras.initializers.Constant(INITIAL_BIAS) if INITIAL_BIAS else None
)(x)
model = tf.keras.Model(inputs = [textInput, featuresInput], outputs = output)
model.compile(
optimizer = tf.keras.optimizers.Adam(lr = hp.get('learning_rate')),
loss = tf.keras.losses.BinaryCrossentropy(),
metrics = METRICS,
)
And the code fails with error:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-17-61aed6f885c9> in <module>
10 featuresInput = tf.keras.Input(shape=(36, ), name = 'features')
11
---> 12 x = tfh.KerasLayer(hp.get('embedding'), dtype = tf.string, trainable = False)(textInput)
13 x = tf.keras.layers.concatenate([x, featuresInput])
14
~/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
920 not base_layer_utils.is_in_eager_or_tf_function()):
921 with auto_control_deps.AutomaticControlDependencies() as acd:
--> 922 outputs = call_fn(cast_inputs, *args, **kwargs)
923 # Wrap Tensors in `outputs` in `tf.identity` to avoid
924 # circular dependencies.
~/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
263 except Exception as e: # pylint:disable=broad-except
264 if hasattr(e, 'ag_error_metadata'):
--> 265 raise e.ag_error_metadata.to_exception(e)
266 else:
267 raise
AssertionError: in user code:
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow_hub/keras_layer.py:222 call *
result = f()
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/saved_model/load.py:486 _call_attribute **
return instance.__call__(*args, **kwargs)
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py:580 __call__
result = self._call(*args, **kwds)
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py:650 _call
return self._concrete_stateful_fn._filtered_call(canon_args, canon_kwds) # pylint: disable=protected-access
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/eager/function.py:1665 _filtered_call
self.captured_inputs)
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/eager/function.py:1759 _call_flat
"StatefulPartitionedCall": self._get_gradient_function()}):
/usr/lib/python3.6/contextlib.py:81 __enter__
return next(self.gen)
/home/e/.virtualenvs/python3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py:4735 _override_gradient_function
assert not self._gradient_function_map
AssertionError:
BUT it works if I use Sequential
model = tf.keras.Sequential([
hub.KerasLayer(embedding, input_shape=[], dtype = tf.string, trainable = True),
tf.keras.layers.Dense(16, activation = 'relu', input_shape = (train_features.shape[-1],)),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(1, activation = 'sigmoid', bias_initializer = output_bias),
])
model.compile(optimizer = tf.keras.optimizers.Adam(lr=1e-3), loss = tf.keras.losses.BinaryCrossentropy(), metrics = metrics)
Is there anything I'm doing wrong with the functual api? Can you please help me with the error
I've faced a similar problem. My solution looks like this:
def build_model():
premise = keras.Input(shape=(), dtype=tf.string)
hypothesis = keras.Input(shape=(), dtype=tf.string)
keras_emb = hub.KerasLayer(embed, input_shape=(), output_shape = (512), dtype=tf.string, trainable=True)
prem_emb = keras_emb(premise)
hyp_emb = keras_emb(hypothesis)
emb = layers.Concatenate()([prem_emb, hyp_emb])
dense = layers.Dense(32, activation="relu")(emb)
classifier = layers.Dense(3)(dense)
model = keras.Model(inputs=[premise, hypothesis], outputs=classifier, name="elementary_model")
model.compile(loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer="adam", metrics=['accuracy'])
return model
Note: the text input shape should be () (empty tuple)
I am trying to apply feature-wise scaling and shifting (also called an affine transformation - the idea is described in the Nomenclature section of this distill article) to a Keras tensor (with TF backend).
The tensor I would like to transform, call it X, is the output of a convolutional layer, and has shape (B,H,W,F), representing (batch size, height, width, number of feature maps).
The parameters of my transformation are two (B,F)-dimensional tensors, beta and gamma.
I want X * gamma + beta, or to be more specific,
for b in range(B):
for f in range(F):
X[b,:,:,f] = X[b,:,:,f] * gamma[b,f] + beta[b,f]
However, neither of these two ways of doing it works in Keras. The second, with element-wise assignment, fails due to
TypeError: 'Tensor' object does not support item assignment
and should be fairly inefficient as well.
How the first fails is more cryptic to me, but my guess is that it is an issue with broadcasting. In the full code + traceback below, you can see my attempt.
Two things to note are that the error only happens at training time (and not when compiling), and that the 'transform_vars' input is seemingly never used, at least according to the model summary.
Any ideas on how to implement this?
import numpy as np
import keras as ks
import keras.backend as K
print(ks.__version__)
# Load example data (here MNIST)
from keras.datasets import mnist
(x_img_train, y_train), _ = mnist.load_data()
x_img_train = np.expand_dims(x_img_train,-1)
# Generator some data to use for transformations
n_transform_vars = 10
x_transform_train = np.random.randn(y_train.shape[0], n_transform_vars)
# Inputs
input_transform = ks.layers.Input(x_transform_train.shape[1:], name='transform_vars')
input_img = ks.layers.Input(x_img_train.shape[1:], name='imgs')
# Number of feature maps
n_features = 32
# Create network that calculates the transformations
tns_transform = ks.layers.Dense(2 * n_features)(input_transform)
tns_transform = ks.layers.Reshape((2, 32))(tns_transform)
# Do a convolution
tns_conv = ks.layers.Conv2D(filters=n_features, kernel_size=3, padding='same')(input_img)
# Apply batch norm
bn = ks.layers.BatchNormalization()
# Freeze the weights of the batch norm, as they are going to be overwritten
bn.trainable = False
# Apply
tns_conv = bn(tns_conv)
# Attempt to apply the affine transformation
def scale_and_shift(x):
return x * tns_transform[:,0] + tns_transform[:,1]
tns_conv = ks.layers.Lambda(scale_and_shift, name='affine_transform')(tns_conv)
tns_conv = ks.layers.Flatten()(tns_conv)
output = ks.layers.Dense(1)(tns_conv)
model = ks.models.Model(inputs=[input_img, input_transform], outputs=output)
model.compile(loss='mse', optimizer='Adam')
model.summary()
model.fit([x_img_train, x_transform_train], y_train, batch_size=8)
This results in
2.2.4
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
imgs (InputLayer) (None, 28, 28, 1) 0
_________________________________________________________________
conv2d_25 (Conv2D) (None, 28, 28, 32) 320
_________________________________________________________________
batch_normalization_22 (Batc (None, 28, 28, 32) 128
_________________________________________________________________
affine_transform (Lambda) (None, 28, 28, 32) 0
_________________________________________________________________
flatten_6 (Flatten) (None, 25088) 0
_________________________________________________________________
dense_33 (Dense) (None, 1) 25089
=================================================================
Total params: 25,537
Trainable params: 25,409
Non-trainable params: 128
_________________________________________________________________
Epoch 1/1
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-35-14724d9432ef> in <module>
49 model.summary()
50
---> 51 model.fit([x_img_train, x_transform_train], y_train, batch_size=8)
~/miniconda3/envs/py3/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1037 initial_epoch=initial_epoch,
1038 steps_per_epoch=steps_per_epoch,
-> 1039 validation_steps=validation_steps)
1040
1041 def evaluate(self, x=None, y=None,
~/miniconda3/envs/py3/lib/python3.6/site-packages/keras/engine/training_arrays.py in fit_loop(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
197 ins_batch[i] = ins_batch[i].toarray()
198
--> 199 outs = f(ins_batch)
200 outs = to_list(outs)
201 for l, o in zip(out_labels, outs):
~/miniconda3/envs/py3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
2713 return self._legacy_call(inputs)
2714
-> 2715 return self._call(inputs)
2716 else:
2717 if py_any(is_tensor(x) for x in inputs):
~/miniconda3/envs/py3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in _call(self, inputs)
2673 fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata)
2674 else:
-> 2675 fetched = self._callable_fn(*array_vals)
2676 return fetched[:len(self.outputs)]
2677
~/miniconda3/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in __call__(self, *args, **kwargs)
1437 ret = tf_session.TF_SessionRunCallable(
1438 self._session._session, self._handle, args, status,
-> 1439 run_metadata_ptr)
1440 if run_metadata:
1441 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~/miniconda3/envs/py3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
526 None, None,
527 compat.as_text(c_api.TF_Message(self.status.status)),
--> 528 c_api.TF_GetCode(self.status.status))
529 # Delete the underlying status object from memory otherwise it stays alive
530 # as there is a reference to status from this from the traceback due to
InvalidArgumentError: Incompatible shapes: [8,28,28,32] vs. [8,32]
[[{{node training_5/Adam/gradients/affine_transform_18/mul_grad/BroadcastGradientArgs}} = BroadcastGradientArgs[T=DT_INT32, _class=["loc:#training_5/Adam/gradients/batch_normalization_22/cond/Merge_grad/cond_grad"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training_5/Adam/gradients/affine_transform_18/mul_grad/Shape, training_5/Adam/gradients/affine_transform_18/mul_grad/Shape_1)]]
I managed to implement the affine transformation as a custom layer (here called a FiLM layer, as in the litterature):
class FiLM(ks.layers.Layer):
def __init__(self, widths=[64,64], activation='leakyrelu',
initialization='glorot_uniform', **kwargs):
self.widths = widths
self.activation = activation
self.initialization = initialization
super(FiLM, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
feature_map_shape, FiLM_vars_shape = input_shape
self.n_feature_maps = feature_map_shape[-1]
self.height = feature_map_shape[1]
self.width = feature_map_shape[2]
# Collect trainable weights
trainable_weights = []
# Create weights for hidden layers
self.hidden_dense_layers = []
for i,width in enumerate(self.widths):
dense = ks.layers.Dense(width,
kernel_initializer=self.initialization,
name=f'FiLM_dense_{i}')
if i==0:
build_shape = FiLM_vars_shape[:2]
else:
build_shape = (None,self.widths[i-1])
dense.build(build_shape)
trainable_weights += dense.trainable_weights
self.hidden_dense_layers.append(dense)
# Create weights for output layer
self.output_dense = ks.layers.Dense(2 * self.n_feature_maps, # assumes channel_last
kernel_initializer=self.initialization,
name=f'FiLM_dense_output')
self.output_dense.build((None,self.widths[-1]))
trainable_weights += self.output_dense.trainable_weights
# Pass on all collected trainable weights
self._trainable_weights = trainable_weights
super(FiLM, self).build(input_shape)
def call(self, x):
assert isinstance(x, list)
conv_output, FiLM_vars = x
# Generate FiLM outputs
tns = FiLM_vars
for i in range(len(self.widths)):
tns = self.hidden_dense_layers[i](tns)
tns = get_activation(activation=self.activation)(tns)
FiLM_output = self.output_dense(tns)
# Duplicate in order to apply to entire feature maps
# Taken from https://github.com/GuessWhatGame/neural_toolbox/blob/master/film_layer.py
FiLM_output = K.expand_dims(FiLM_output, axis=[1])
FiLM_output = K.expand_dims(FiLM_output, axis=[1])
FiLM_output = K.tile(FiLM_output, [1, self.height, self.width, 1])
# Split into gammas and betas
gammas = FiLM_output[:, :, :, :self.n_feature_maps]
betas = FiLM_output[:, :, :, self.n_feature_maps:]
# Apply affine transformation
return (1 + gammas) * conv_output + betas
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
return input_shape[0]
It depends on the function get_activation, which essentially just returns a Keras activation instance. You can see the full working example below.
Note that this layer does the processing of the transform_vars in the layer itself. If you want to process these variables in another network, see the edit below.
import numpy as np
import keras as ks
import keras.backend as K
def get_activation(tns=None, activation='relu'):
'''
Adds an activation layer to a graph.
Args :
tns :
*Keras tensor or None*
Input tensor. If not None, then the graph will be connected through
it, and a tensor will be returned. If None, the activation layer
will be returned.
activation :
*str, optional (default='relu')*
The name of an activation function.
One of 'relu', 'leakyrelu', 'prelu', 'elu', 'mrelu' or 'swish',
or anything that Keras will recognize as an activation function
name.
Returns :
*Keras tensor or layer instance* (see tns argument)
'''
if activation == 'relu':
act = ks.layers.ReLU()
elif activation == 'leakyrelu':
act = ks.layers.LeakyReLU()
elif activation == 'prelu':
act = ks.layers.PReLU()
elif activation == 'elu':
act = ks.layers.ELU()
elif activation == 'swish':
def swish(x):
return K.sigmoid(x) * x
act = ks.layers.Activation(swish)
elif activation == 'mrelu':
def mrelu(x):
return K.minimum(K.maximum(1-x, 0), K.maximum(1+x, 0))
act = ks.layers.Activation(mrelu)
elif activation == 'gaussian':
def gaussian(x):
return K.exp(-x**2)
act = ks.layers.Activation(gaussian)
elif activation == 'flipped_gaussian':
def flipped_gaussian(x):
return 1 - K.exp(-x**2)
act = ks.layers.Activation(flipped_gaussian)
else:
act = ks.layers.Activation(activation)
if tns is not None:
return act(tns)
else:
return act
class FiLM(ks.layers.Layer):
def __init__(self, widths=[64,64], activation='leakyrelu',
initialization='glorot_uniform', **kwargs):
self.widths = widths
self.activation = activation
self.initialization = initialization
super(FiLM, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
feature_map_shape, FiLM_vars_shape = input_shape
self.n_feature_maps = feature_map_shape[-1]
self.height = feature_map_shape[1]
self.width = feature_map_shape[2]
# Collect trainable weights
trainable_weights = []
# Create weights for hidden layers
self.hidden_dense_layers = []
for i,width in enumerate(self.widths):
dense = ks.layers.Dense(width,
kernel_initializer=self.initialization,
name=f'FiLM_dense_{i}')
if i==0:
build_shape = FiLM_vars_shape[:2]
else:
build_shape = (None,self.widths[i-1])
dense.build(build_shape)
trainable_weights += dense.trainable_weights
self.hidden_dense_layers.append(dense)
# Create weights for output layer
self.output_dense = ks.layers.Dense(2 * self.n_feature_maps, # assumes channel_last
kernel_initializer=self.initialization,
name=f'FiLM_dense_output')
self.output_dense.build((None,self.widths[-1]))
trainable_weights += self.output_dense.trainable_weights
# Pass on all collected trainable weights
self._trainable_weights = trainable_weights
super(FiLM, self).build(input_shape)
def call(self, x):
assert isinstance(x, list)
conv_output, FiLM_vars = x
# Generate FiLM outputs
tns = FiLM_vars
for i in range(len(self.widths)):
tns = self.hidden_dense_layers[i](tns)
tns = get_activation(activation=self.activation)(tns)
FiLM_output = self.output_dense(tns)
# Duplicate in order to apply to entire feature maps
# Taken from https://github.com/GuessWhatGame/neural_toolbox/blob/master/film_layer.py
FiLM_output = K.expand_dims(FiLM_output, axis=[1])
FiLM_output = K.expand_dims(FiLM_output, axis=[1])
FiLM_output = K.tile(FiLM_output, [1, self.height, self.width, 1])
# Split into gammas and betas
gammas = FiLM_output[:, :, :, :self.n_feature_maps]
betas = FiLM_output[:, :, :, self.n_feature_maps:]
# Apply affine transformation
return (1 + gammas) * conv_output + betas
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
return input_shape[0]
print(ks.__version__)
# Load example data (here MNIST)
from keras.datasets import mnist
(x_img_train, y_train), _ = mnist.load_data()
x_img_train = np.expand_dims(x_img_train,-1)
# Generator some data to use for transformations
n_transform_vars = 10
x_transform_train = np.random.randn(y_train.shape[0], n_transform_vars)
# Inputs
input_transform = ks.layers.Input(x_transform_train.shape[1:], name='transform_vars')
input_img = ks.layers.Input(x_img_train.shape[1:], name='imgs')
# Number of feature maps
n_features = 32
# Do a convolution
tns = ks.layers.Conv2D(filters=n_features, kernel_size=3, padding='same')(input_img)
# Apply batch norm
bn = ks.layers.BatchNormalization()
# Freeze the weights of the batch norm, as they are going to be overwritten
bn.trainable = False
# Apply batch norm
tns = bn(tns)
# Apply FiLM layer
tns = FiLM(widths=[12,24], name='FiLM_layer')([tns, input_transform])
# Make 1D output
tns = ks.layers.Flatten()(tns)
output = ks.layers.Dense(1)(tns)
# Compile and plot
model = ks.models.Model(inputs=[input_img, input_transform], outputs=output)
model.compile(loss='mse', optimizer='Adam')
model.summary()
ks.utils.plot_model(model, './model_with_FiLM.png')
# Train
model.fit([x_img_train, x_transform_train], y_train, batch_size=8)
EDIT:
Here is the "non-active" FiLM layer, which takes in the predictions of another network (the FiLM generator) and uses them as gammas and betas.
This way of doing it is equivalent, but simpler, as you keep all the trainable weights in the FiLM generator, and therefore ensures weight-sharing.
class FiLM(ks.layers.Layer):
def __init__(self, **kwargs):
super(FiLM, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
feature_map_shape, FiLM_tns_shape = input_shape
self.height = feature_map_shape[1]
self.width = feature_map_shape[2]
self.n_feature_maps = feature_map_shape[-1]
assert(int(2 * self.n_feature_maps)==FiLM_tns_shape[1])
super(FiLM, self).build(input_shape)
def call(self, x):
assert isinstance(x, list)
conv_output, FiLM_tns = x
# Duplicate in order to apply to entire feature maps
# Taken from https://github.com/GuessWhatGame/neural_toolbox/blob/master/film_layer.py
FiLM_tns = K.expand_dims(FiLM_tns, axis=[1])
FiLM_tns = K.expand_dims(FiLM_tns, axis=[1])
FiLM_tns = K.tile(FiLM_tns, [1, self.height, self.width, 1])
# Split into gammas and betas
gammas = FiLM_tns[:, :, :, :self.n_feature_maps]
betas = FiLM_tns[:, :, :, self.n_feature_maps:]
# Apply affine transformation
return (1 + gammas) * conv_output + betas
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
return input_shape[0]