I am trying to apply feature-wise scaling and shifting (also called an affine transformation - the idea is described in the Nomenclature section of this distill article) to a Keras tensor (with TF backend).
The tensor I would like to transform, call it X, is the output of a convolutional layer, and has shape (B,H,W,F), representing (batch size, height, width, number of feature maps).
The parameters of my transformation are two (B,F)-dimensional tensors, beta and gamma.
I want X * gamma + beta, or to be more specific,
for b in range(B):
for f in range(F):
X[b,:,:,f] = X[b,:,:,f] * gamma[b,f] + beta[b,f]
However, neither of these two ways of doing it works in Keras. The second, with element-wise assignment, fails due to
TypeError: 'Tensor' object does not support item assignment
and should be fairly inefficient as well.
How the first fails is more cryptic to me, but my guess is that it is an issue with broadcasting. In the full code + traceback below, you can see my attempt.
Two things to note are that the error only happens at training time (and not when compiling), and that the 'transform_vars' input is seemingly never used, at least according to the model summary.
Any ideas on how to implement this?
import numpy as np
import keras as ks
import keras.backend as K
print(ks.__version__)
# Load example data (here MNIST)
from keras.datasets import mnist
(x_img_train, y_train), _ = mnist.load_data()
x_img_train = np.expand_dims(x_img_train,-1)
# Generator some data to use for transformations
n_transform_vars = 10
x_transform_train = np.random.randn(y_train.shape[0], n_transform_vars)
# Inputs
input_transform = ks.layers.Input(x_transform_train.shape[1:], name='transform_vars')
input_img = ks.layers.Input(x_img_train.shape[1:], name='imgs')
# Number of feature maps
n_features = 32
# Create network that calculates the transformations
tns_transform = ks.layers.Dense(2 * n_features)(input_transform)
tns_transform = ks.layers.Reshape((2, 32))(tns_transform)
# Do a convolution
tns_conv = ks.layers.Conv2D(filters=n_features, kernel_size=3, padding='same')(input_img)
# Apply batch norm
bn = ks.layers.BatchNormalization()
# Freeze the weights of the batch norm, as they are going to be overwritten
bn.trainable = False
# Apply
tns_conv = bn(tns_conv)
# Attempt to apply the affine transformation
def scale_and_shift(x):
return x * tns_transform[:,0] + tns_transform[:,1]
tns_conv = ks.layers.Lambda(scale_and_shift, name='affine_transform')(tns_conv)
tns_conv = ks.layers.Flatten()(tns_conv)
output = ks.layers.Dense(1)(tns_conv)
model = ks.models.Model(inputs=[input_img, input_transform], outputs=output)
model.compile(loss='mse', optimizer='Adam')
model.summary()
model.fit([x_img_train, x_transform_train], y_train, batch_size=8)
This results in
2.2.4
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
imgs (InputLayer) (None, 28, 28, 1) 0
_________________________________________________________________
conv2d_25 (Conv2D) (None, 28, 28, 32) 320
_________________________________________________________________
batch_normalization_22 (Batc (None, 28, 28, 32) 128
_________________________________________________________________
affine_transform (Lambda) (None, 28, 28, 32) 0
_________________________________________________________________
flatten_6 (Flatten) (None, 25088) 0
_________________________________________________________________
dense_33 (Dense) (None, 1) 25089
=================================================================
Total params: 25,537
Trainable params: 25,409
Non-trainable params: 128
_________________________________________________________________
Epoch 1/1
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-35-14724d9432ef> in <module>
49 model.summary()
50
---> 51 model.fit([x_img_train, x_transform_train], y_train, batch_size=8)
~/miniconda3/envs/py3/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1037 initial_epoch=initial_epoch,
1038 steps_per_epoch=steps_per_epoch,
-> 1039 validation_steps=validation_steps)
1040
1041 def evaluate(self, x=None, y=None,
~/miniconda3/envs/py3/lib/python3.6/site-packages/keras/engine/training_arrays.py in fit_loop(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
197 ins_batch[i] = ins_batch[i].toarray()
198
--> 199 outs = f(ins_batch)
200 outs = to_list(outs)
201 for l, o in zip(out_labels, outs):
~/miniconda3/envs/py3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
2713 return self._legacy_call(inputs)
2714
-> 2715 return self._call(inputs)
2716 else:
2717 if py_any(is_tensor(x) for x in inputs):
~/miniconda3/envs/py3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in _call(self, inputs)
2673 fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata)
2674 else:
-> 2675 fetched = self._callable_fn(*array_vals)
2676 return fetched[:len(self.outputs)]
2677
~/miniconda3/envs/py3/lib/python3.6/site-packages/tensorflow/python/client/session.py in __call__(self, *args, **kwargs)
1437 ret = tf_session.TF_SessionRunCallable(
1438 self._session._session, self._handle, args, status,
-> 1439 run_metadata_ptr)
1440 if run_metadata:
1441 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~/miniconda3/envs/py3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
526 None, None,
527 compat.as_text(c_api.TF_Message(self.status.status)),
--> 528 c_api.TF_GetCode(self.status.status))
529 # Delete the underlying status object from memory otherwise it stays alive
530 # as there is a reference to status from this from the traceback due to
InvalidArgumentError: Incompatible shapes: [8,28,28,32] vs. [8,32]
[[{{node training_5/Adam/gradients/affine_transform_18/mul_grad/BroadcastGradientArgs}} = BroadcastGradientArgs[T=DT_INT32, _class=["loc:#training_5/Adam/gradients/batch_normalization_22/cond/Merge_grad/cond_grad"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training_5/Adam/gradients/affine_transform_18/mul_grad/Shape, training_5/Adam/gradients/affine_transform_18/mul_grad/Shape_1)]]
I managed to implement the affine transformation as a custom layer (here called a FiLM layer, as in the litterature):
class FiLM(ks.layers.Layer):
def __init__(self, widths=[64,64], activation='leakyrelu',
initialization='glorot_uniform', **kwargs):
self.widths = widths
self.activation = activation
self.initialization = initialization
super(FiLM, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
feature_map_shape, FiLM_vars_shape = input_shape
self.n_feature_maps = feature_map_shape[-1]
self.height = feature_map_shape[1]
self.width = feature_map_shape[2]
# Collect trainable weights
trainable_weights = []
# Create weights for hidden layers
self.hidden_dense_layers = []
for i,width in enumerate(self.widths):
dense = ks.layers.Dense(width,
kernel_initializer=self.initialization,
name=f'FiLM_dense_{i}')
if i==0:
build_shape = FiLM_vars_shape[:2]
else:
build_shape = (None,self.widths[i-1])
dense.build(build_shape)
trainable_weights += dense.trainable_weights
self.hidden_dense_layers.append(dense)
# Create weights for output layer
self.output_dense = ks.layers.Dense(2 * self.n_feature_maps, # assumes channel_last
kernel_initializer=self.initialization,
name=f'FiLM_dense_output')
self.output_dense.build((None,self.widths[-1]))
trainable_weights += self.output_dense.trainable_weights
# Pass on all collected trainable weights
self._trainable_weights = trainable_weights
super(FiLM, self).build(input_shape)
def call(self, x):
assert isinstance(x, list)
conv_output, FiLM_vars = x
# Generate FiLM outputs
tns = FiLM_vars
for i in range(len(self.widths)):
tns = self.hidden_dense_layers[i](tns)
tns = get_activation(activation=self.activation)(tns)
FiLM_output = self.output_dense(tns)
# Duplicate in order to apply to entire feature maps
# Taken from https://github.com/GuessWhatGame/neural_toolbox/blob/master/film_layer.py
FiLM_output = K.expand_dims(FiLM_output, axis=[1])
FiLM_output = K.expand_dims(FiLM_output, axis=[1])
FiLM_output = K.tile(FiLM_output, [1, self.height, self.width, 1])
# Split into gammas and betas
gammas = FiLM_output[:, :, :, :self.n_feature_maps]
betas = FiLM_output[:, :, :, self.n_feature_maps:]
# Apply affine transformation
return (1 + gammas) * conv_output + betas
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
return input_shape[0]
It depends on the function get_activation, which essentially just returns a Keras activation instance. You can see the full working example below.
Note that this layer does the processing of the transform_vars in the layer itself. If you want to process these variables in another network, see the edit below.
import numpy as np
import keras as ks
import keras.backend as K
def get_activation(tns=None, activation='relu'):
'''
Adds an activation layer to a graph.
Args :
tns :
*Keras tensor or None*
Input tensor. If not None, then the graph will be connected through
it, and a tensor will be returned. If None, the activation layer
will be returned.
activation :
*str, optional (default='relu')*
The name of an activation function.
One of 'relu', 'leakyrelu', 'prelu', 'elu', 'mrelu' or 'swish',
or anything that Keras will recognize as an activation function
name.
Returns :
*Keras tensor or layer instance* (see tns argument)
'''
if activation == 'relu':
act = ks.layers.ReLU()
elif activation == 'leakyrelu':
act = ks.layers.LeakyReLU()
elif activation == 'prelu':
act = ks.layers.PReLU()
elif activation == 'elu':
act = ks.layers.ELU()
elif activation == 'swish':
def swish(x):
return K.sigmoid(x) * x
act = ks.layers.Activation(swish)
elif activation == 'mrelu':
def mrelu(x):
return K.minimum(K.maximum(1-x, 0), K.maximum(1+x, 0))
act = ks.layers.Activation(mrelu)
elif activation == 'gaussian':
def gaussian(x):
return K.exp(-x**2)
act = ks.layers.Activation(gaussian)
elif activation == 'flipped_gaussian':
def flipped_gaussian(x):
return 1 - K.exp(-x**2)
act = ks.layers.Activation(flipped_gaussian)
else:
act = ks.layers.Activation(activation)
if tns is not None:
return act(tns)
else:
return act
class FiLM(ks.layers.Layer):
def __init__(self, widths=[64,64], activation='leakyrelu',
initialization='glorot_uniform', **kwargs):
self.widths = widths
self.activation = activation
self.initialization = initialization
super(FiLM, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
feature_map_shape, FiLM_vars_shape = input_shape
self.n_feature_maps = feature_map_shape[-1]
self.height = feature_map_shape[1]
self.width = feature_map_shape[2]
# Collect trainable weights
trainable_weights = []
# Create weights for hidden layers
self.hidden_dense_layers = []
for i,width in enumerate(self.widths):
dense = ks.layers.Dense(width,
kernel_initializer=self.initialization,
name=f'FiLM_dense_{i}')
if i==0:
build_shape = FiLM_vars_shape[:2]
else:
build_shape = (None,self.widths[i-1])
dense.build(build_shape)
trainable_weights += dense.trainable_weights
self.hidden_dense_layers.append(dense)
# Create weights for output layer
self.output_dense = ks.layers.Dense(2 * self.n_feature_maps, # assumes channel_last
kernel_initializer=self.initialization,
name=f'FiLM_dense_output')
self.output_dense.build((None,self.widths[-1]))
trainable_weights += self.output_dense.trainable_weights
# Pass on all collected trainable weights
self._trainable_weights = trainable_weights
super(FiLM, self).build(input_shape)
def call(self, x):
assert isinstance(x, list)
conv_output, FiLM_vars = x
# Generate FiLM outputs
tns = FiLM_vars
for i in range(len(self.widths)):
tns = self.hidden_dense_layers[i](tns)
tns = get_activation(activation=self.activation)(tns)
FiLM_output = self.output_dense(tns)
# Duplicate in order to apply to entire feature maps
# Taken from https://github.com/GuessWhatGame/neural_toolbox/blob/master/film_layer.py
FiLM_output = K.expand_dims(FiLM_output, axis=[1])
FiLM_output = K.expand_dims(FiLM_output, axis=[1])
FiLM_output = K.tile(FiLM_output, [1, self.height, self.width, 1])
# Split into gammas and betas
gammas = FiLM_output[:, :, :, :self.n_feature_maps]
betas = FiLM_output[:, :, :, self.n_feature_maps:]
# Apply affine transformation
return (1 + gammas) * conv_output + betas
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
return input_shape[0]
print(ks.__version__)
# Load example data (here MNIST)
from keras.datasets import mnist
(x_img_train, y_train), _ = mnist.load_data()
x_img_train = np.expand_dims(x_img_train,-1)
# Generator some data to use for transformations
n_transform_vars = 10
x_transform_train = np.random.randn(y_train.shape[0], n_transform_vars)
# Inputs
input_transform = ks.layers.Input(x_transform_train.shape[1:], name='transform_vars')
input_img = ks.layers.Input(x_img_train.shape[1:], name='imgs')
# Number of feature maps
n_features = 32
# Do a convolution
tns = ks.layers.Conv2D(filters=n_features, kernel_size=3, padding='same')(input_img)
# Apply batch norm
bn = ks.layers.BatchNormalization()
# Freeze the weights of the batch norm, as they are going to be overwritten
bn.trainable = False
# Apply batch norm
tns = bn(tns)
# Apply FiLM layer
tns = FiLM(widths=[12,24], name='FiLM_layer')([tns, input_transform])
# Make 1D output
tns = ks.layers.Flatten()(tns)
output = ks.layers.Dense(1)(tns)
# Compile and plot
model = ks.models.Model(inputs=[input_img, input_transform], outputs=output)
model.compile(loss='mse', optimizer='Adam')
model.summary()
ks.utils.plot_model(model, './model_with_FiLM.png')
# Train
model.fit([x_img_train, x_transform_train], y_train, batch_size=8)
EDIT:
Here is the "non-active" FiLM layer, which takes in the predictions of another network (the FiLM generator) and uses them as gammas and betas.
This way of doing it is equivalent, but simpler, as you keep all the trainable weights in the FiLM generator, and therefore ensures weight-sharing.
class FiLM(ks.layers.Layer):
def __init__(self, **kwargs):
super(FiLM, self).__init__(**kwargs)
def build(self, input_shape):
assert isinstance(input_shape, list)
feature_map_shape, FiLM_tns_shape = input_shape
self.height = feature_map_shape[1]
self.width = feature_map_shape[2]
self.n_feature_maps = feature_map_shape[-1]
assert(int(2 * self.n_feature_maps)==FiLM_tns_shape[1])
super(FiLM, self).build(input_shape)
def call(self, x):
assert isinstance(x, list)
conv_output, FiLM_tns = x
# Duplicate in order to apply to entire feature maps
# Taken from https://github.com/GuessWhatGame/neural_toolbox/blob/master/film_layer.py
FiLM_tns = K.expand_dims(FiLM_tns, axis=[1])
FiLM_tns = K.expand_dims(FiLM_tns, axis=[1])
FiLM_tns = K.tile(FiLM_tns, [1, self.height, self.width, 1])
# Split into gammas and betas
gammas = FiLM_tns[:, :, :, :self.n_feature_maps]
betas = FiLM_tns[:, :, :, self.n_feature_maps:]
# Apply affine transformation
return (1 + gammas) * conv_output + betas
def compute_output_shape(self, input_shape):
assert isinstance(input_shape, list)
return input_shape[0]
Related
I implemented this code for node classification task on IEEE-118bus dataset. The input size x=[2960,118], float32 numpy array and y=[2960,1] is 0 or 1 labels for each graph. My goal is to run a binary classification on the x graphs with batchsize 2960 based on their node features. Below is my code:
hidden_units = [64, 64];learning_rate = 0.01;dropout_rate = 0.5;num_epochs = 50;batch_size = 256
def run_experiment(model, x_train, y_train):
# Compile the model.
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss=keras.losses.BinaryCrossentropy(from_logits=True),
metrics=[keras.metrics.BinaryAccuracy(name="acc")],
)
# Create an early stopping callback.
early_stopping = keras.callbacks.EarlyStopping(
monitor='loss', patience=110, restore_best_weights=True
)
history = model.fit(
x=x_train,
y=y_train,
epochs=num_epochs,
batch_size=batch_size,
validation_split=0.1,
callbacks=[early_stopping],
)
return history
def create_ffn(hidden_units, dropout_rate, name=None):
fnn_layers = []
for units in hidden_units:
fnn_layers.append(layers.BatchNormalization())
fnn_layers.append(layers.Dropout(dropout_rate))
fnn_layers.append(layers.Dense(units, activation=tf.nn.relu))
return keras.Sequential(fnn_layers, name=name)
node_features = train_graph_pi
adj_new = adj_mat.tolist()
adj_t = torch.tensor(adj_new)
edge_index = adj_t.nonzero().t().contiguous()
edges = edge_index.numpy()
edge_weights = None
graph_info = (node_features, edges, edge_weights)
print("Edges shape:", edges.shape)
print("Nodes shape:", node_features.shape)
class GraphConvLayer(layers.Layer):
def __init__(
self,
hidden_units,
dropout_rate=0.5,
aggregation_type="sum",
combination_type="gru",
normalize=False,
*args,
**kwargs,
):
super(GraphConvLayer, self).__init__(*args, **kwargs)
self.aggregation_type = aggregation_type
self.combination_type = combination_type
self.normalize = normalize
self.ffn_prepare = create_ffn(hidden_units, dropout_rate)
if self.combination_type == "gated":
self.update_fn = layers.GRU(
units=hidden_units,
activation="tanh",
recurrent_activation="sigmoid",
dropout=dropout_rate,
return_state=True,
recurrent_dropout=dropout_rate,
)
else:
self.update_fn = create_ffn(hidden_units, dropout_rate)
def prepare(self, node_repesentations, weights=None):
# node_repesentations shape is [num_edges, embedding_dim].
messages = self.ffn_prepare(node_repesentations)
if weights is not None:
messages = messages * tf.expand_dims(weights, -1)
return messages
def aggregate(self, node_indices, neighbour_messages):
# node_indices shape is [num_edges].
# neighbour_messages shape: [num_edges, representation_dim].
num_nodes = tf.math.reduce_max(node_indices) + 1
if self.aggregation_type == "sum":
aggregated_message = tf.math.unsorted_segment_sum(
neighbour_messages, node_indices, num_segments=num_nodes
)
elif self.aggregation_type == "mean":
aggregated_message = tf.math.unsorted_segment_mean(
neighbour_messages, node_indices, num_segments=num_nodes
)
elif self.aggregation_type == "max":
aggregated_message = tf.math.unsorted_segment_max(
neighbour_messages, node_indices, num_segments=num_nodes
)
else:
raise ValueError(f"Invalid aggregation type: {self.aggregation_type}.")
return aggregated_message
def update(self, node_repesentations, aggregated_messages):
# node_repesentations shape is [num_nodes, representation_dim].
# aggregated_messages shape is [num_nodes, representation_dim].
if self.combination_type == "gru":
# Create a sequence of two elements for the GRU layer.
h = tf.stack([node_repesentations, aggregated_messages], axis=1)
elif self.combination_type == "concat":
# Concatenate the node_repesentations and aggregated_messages.
h = tf.concat([node_repesentations, aggregated_messages], axis=1)
elif self.combination_type == "add":
# Add node_repesentations and aggregated_messages.
h = node_repesentations + aggregated_messages
else:
raise ValueError(f"Invalid combination type: {self.combination_type}.")
# Apply the processing function.
node_embeddings = self.update_fn(h)
if self.combination_type == "gru":
node_embeddings = tf.unstack(node_embeddings, axis=1)[-1]
if self.normalize:
node_embeddings = tf.nn.l2_normalize(node_embeddings, axis=-1)
return node_embeddings
def call(self, inputs):
"""Process the inputs to produce the node_embeddings.
inputs: a tuple of three elements: node_repesentations, edges, edge_weights.
Returns: node_embeddings of shape [num_nodes, representation_dim].
"""
node_repesentations, edges, edge_weights = inputs
# Get node_indices (source) and neighbour_indices (target) from edges.
node_indices, neighbour_indices = edges[0], edges[1]
# neighbour_repesentations shape is [num_edges, representation_dim].
neighbour_repesentations = tf.gather(node_repesentations, neighbour_indices)
# Prepare the messages of the neighbours.
neighbour_messages = self.prepare(neighbour_repesentations, edge_weights)
# Aggregate the neighbour messages.
aggregated_messages = self.aggregate(node_indices, neighbour_messages)
# Update the node embedding with the neighbour messages.
return self.update(node_repesentations, aggregated_messages)
class GNNNodeClassifier(tf.keras.Model):
def __init__(
self,
graph_info,
num_classes,
hidden_units,
aggregation_type="sum",
combination_type="concat",
dropout_rate=0.5,
normalize=True,
*args,
**kwargs,
):
super(GNNNodeClassifier, self).__init__(*args, **kwargs)
# Unpack graph_info to three elements: node_features, edges, and edge_weight.
node_features, edges, edge_weights = graph_info
self.node_features = node_features
self.edges = edges
self.edge_weights = edge_weights
# Set edge_weights to ones if not provided.
if self.edge_weights is None:
self.edge_weights = tf.ones(shape=edges.shape[1])
# Scale edge_weights to sum to 1.
self.edge_weights = self.edge_weights / tf.math.reduce_sum(self.edge_weights)
# Create a process layer.
# Create the first GraphConv layer.
self.conv1 = GraphConvLayer(
hidden_units,
dropout_rate,
aggregation_type,
combination_type,
normalize,
name="graph_conv1",
)
# Create the second GraphConv layer.
self.conv2 = GraphConvLayer(
hidden_units,
dropout_rate,
aggregation_type,
combination_type,
normalize,
name="graph_conv2",
)
self.conv3 = GraphConvLayer(
hidden_units,
dropout_rate,
aggregation_type,
combination_type,
normalize,
name="graph_conv3",
)
self.conv4 = GraphConvLayer(
hidden_units,
dropout_rate,
aggregation_type,
combination_type,
normalize,
name="graph_conv4",
)
# Create a compute logits layer.
self.compute_logits = layers.Dense(units=num_classes-1, name="logits")
def call(self, input_node_indices):
# Apply the first graph conv layer.
x1 = self.conv1((self.node_features, self.edges, self.edge_weights))
# Skip connection.
# Apply the second graph conv layer.
x2 = self.conv2((x1, self.edges, self.edge_weights))
# Skip connection.
x = x2 + x1
# Apply the third graph conv layer.
x3 = self.conv3((x, self.edges, self.edge_weights))
# Skip connection.
x = x3 + x
# Apply the fourth graph conv layer.
x4 = self.conv4((x, self.edges, self.edge_weights))
# Skip connection.
x = x4 + x
# Fetch node embeddings for the input node_indices.
node_embeddings = tf.gather(x, input_node_indices)
# Compute logits
return self.compute_logits(node_embeddings)
gnn_model = GNNNodeClassifier(
graph_info=graph_info,
num_classes=num_classes,
hidden_units=hidden_units,
dropout_rate=dropout_rate,
name="gnn_model")
print("GNN output shape:", gnn_model([100]))
gnn_model.summary()
The output of the above architecture is as below:
=================================================================
graph_conv1 (GraphConvLayer multiple 415872
)
graph_conv2 (GraphConvLayer multiple 22016
)
graph_conv3 (GraphConvLayer multiple 22016
)
graph_conv4 (GraphConvLayer multiple 22016
)
logits (Dense) multiple 65
=================================================================
Total params: 481,985
Trainable params: 467,841
Non-trainable params: 14,144
However, when I begin training the model, I get the following error:
ValueError: logits and labels must have the same shape, received ((None, 118, 1) vs (None, 1, 1)).
However, when I change the from_logits=False in model.compile the model runs but either the model overfit or the loss and accuracy values are BAD!!!
I have changed the BinaryCrossEntropy loss function to other loss functions such as SparseCategoricalCrossEntropy and/or CategoricalCrossEntropy but still the error stands. Additionally, I have tried to add a Flatten() layer to my architecture but so far the same error shows up.
I would be thankful if anyone could help me with this issue! Thanks!
I'm trying to implement "attention is all you need" paper to time series with a few tweaks, but I'm getting this error:
OperatorNotAllowedInGraphError: iterating over tf.Tensor is not allowed: AutoGraph did convert this function.
Code:
import tensorflow as tf
from tensorflow import keras
class Attention(tf.keras.layers.Layer):
def __init__(self, dk, dv, num_heads, filter_size):
super().__init__()
self.dk = dk
self.dv = dv
self.num_heads = num_heads
self.conv_q = tf.keras.layers.Conv1D(dk * num_heads, filter_size, padding='causal')
self.conv_k = tf.keras.layers.Conv1D(dk * num_heads, filter_size, padding='causal')
self.dense_v = tf.keras.layers.Dense(dv * num_heads)
self.dense1 = tf.keras.layers.Dense(dv, activation='relu')
self.dense2 = tf.keras.layers.Dense(dv)
def split_heads(self, x, batch_size, dim):
x = tf.reshape(x, (batch_size, -1, self.num_heads, dim))
return tf.transpose(x, perm=[0, 2, 1, 3])
def call(self, inputs):
batch_size, time_steps, _ = tf.shape(inputs)
q = self.conv_q(inputs)
k = self.conv_k(inputs)
v = self.dense_v(inputs)
q = self.split_heads(q, batch_size, self.dk)
k = self.split_heads(k, batch_size, self.dk)
v = self.split_heads(v, batch_size, self.dv)
mask = 1 - tf.linalg.band_part(tf.ones((batch_size, self.num_heads, time_steps, time_steps)), -1, 0)
dk = tf.cast(self.dk, tf.float32)
score = tf.nn.softmax(tf.matmul(q, k, transpose_b=True)/tf.math.sqrt(dk) + mask * -1e9)
outputs = tf.matmul(score, v)
outputs = tf.transpose(outputs, perm=[0, 2, 1, 3])
outputs = tf.reshape(outputs, (batch_size, time_steps, -1))
outputs = self.dense1(outputs)
outputs = self.dense2(outputs)
return outputs
class Transformer(tf.keras.models.Model):
"""
Time Series Transformer Model
"""
def __init__(self, dk, dv, num_heads, filter_size):
super().__init__()
self.attention = Attention(dk, dv, num_heads, filter_size)
self.dense_sigma = tf.keras.layers.Dense(1)
def call(self, inputs):
outputs = self.attention(inputs)
sigma = self.dense_sigma(outputs)
return sigma
Mymodel= Transformer(3,3,4,3)
Mymodel.compile(loss="mean_squared_error",
optimizer=keras.optimizers.Adam(learning_rate=1e-4),)
Mymodel.fit(X_train,Y_train,epochs=10,batch_size=32)
#X_train & Y_train are numpy array with shape ( batch_size , timesteps , no.of features )
Full Error :
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
984 except Exception as e: # pylint:disable=broad-except
985 if hasattr(e, "ag_error_metadata"):
--> 986 raise e.ag_error_metadata.to_exception(e)
987 else:
988 raise
OperatorNotAllowedInGraphError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:855 train_function *
return step_function(self, iterator)
<ipython-input-20-56419dd4aeb6>:61 call *
outputs = self.attention(inputs)
<ipython-input-3-a936077354d3>:24 call *
batch_size, time_steps, _ = tf.shape(inputs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:520 __iter__
self._disallow_iteration()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:513 _disallow_iteration
self._disallow_when_autograph_enabled("iterating over `tf.Tensor`")
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:491 _disallow_when_autograph_enabled
" indicate you are trying to use an unsupported feature.".format(task))
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
It seems that in graph mode, for unpacking a tensor it tries to iterate over result. Anyway, you may use this instead:
batch_size = tf.shape(inputs)[0]
time_steps = tf.shape(inputs)[1]
My first recommendation was using .shape, however I modified my answer since this hint from tensorflow docs here:
tf.shape(x) and x.shape should be identical in eager mode. Within tf.function, not all dimensions may be known until execution time. Hence when defining custom layers and models for graph mode, prefer the dynamic tf.shape(x) over the static x.shape.
When I am defining the Hierarchical Attentional Network, an error is popping up which says "AttributeError: can't set attribute". Please help.
This is the Attention.py file
import keras
import Attention
from keras.engine.topology import Layer, Input
from keras import backend as K
from keras import initializers
#Hierarchical Attention Layer Implementation
'''
Implemented by Arkadipta De (MIT Licensed)
'''
class Hierarchical_Attention(Layer):
def __init__(self, attention_dim):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = attention_dim
super(Hierarchical_Attention, self).__init__()
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim, )))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weights = [self.W, self.b, self.u]
super(Hierarchical_Attention, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.dot(uit, self.u)
ait = K.squeeze(ait, -1)
ait = K.exp(ait)
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
ait = K.expand_dims(ait)
weighted_input = x * ait
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
This is the main file where I'm building the model.
import re
import os
import numpy as np
import pandas as pd
import keras
from keras.engine.topology import Layer, Input
import Attention
from sklearn.model_selection import train_test_split
from keras.models import Model, Input
from keras.layers import Dropout, Dense, LSTM, GRU, Bidirectional, concatenate, Multiply, Subtract
from keras.utils import to_categorical
from keras import backend as K
from keras import initializers
Max_Title_Length = 0
Max_Content_Length = 0
for i in range(0, len(X)):
Max_Title_Length = max(Max_Title_Length, len(X['title'][i]))
Max_Content_Length = max(Max_Content_Length, len(X['text'][i]))
vector_size = 100
input_title = Input(shape = (Max_Title_Length,vector_size,), name = 'input_title')
input_content = Input(shape = (Max_Content_Length,vector_size,), name = 'input_content')
def Classifier(input_title, input_content):
#x = Bidirectional(GRU(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_title)
x = Bidirectional(GRU(100, return_sequences=True))(input_title)
x_attention = Attention.Hierarchical_Attention(100)(x)
#y = Bidirectional(LSTM(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_content)
y = Bidirectional(GRU(100, return_sequences=True))(input_content)
y_attention = Attention.Hierarchical_Attention(100)(y)
z = concatenate([x_attention,y_attention])
z = Dense(units = 512, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 256, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 128, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 50, activation = 'relu')(z)
z = Dropout(0.2)(z)
z = Dense(units = 10, activation = 'relu')(z)
z = Dropout(0.2)(z)
output = Dense(units = 2, activation = 'softmax')(z)
model = Model(inputs = [input_title, input_content], outputs = output)
model.summary()
return model
def compile_and_train(model, num_epochs):
model.compile(optimizer= 'adam', loss= 'categorical_crossentropy', metrics=['acc'])
history = model.fit([train_x_title,train_x_content], train_label, batch_size=32, epochs=num_epochs)
return history
Classifier_Model = Classifier(input_title,input_content)
This code is giving me an error which says:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __setattr__(self, name, value)
2761 try:
-> 2762 super(tracking.AutoTrackable, self).__setattr__(name, value)
2763 except AttributeError:
AttributeError: can't set attribute
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
6 frames
<ipython-input-43-32804502e0b0> in <module>()
32 return history
33
---> 34 Classifier_Model = Classifier(input_title,input_content)
<ipython-input-43-32804502e0b0> in Classifier(input_title, input_content)
7 #x = Bidirectional(GRU(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_title)
8 x = Bidirectional(GRU(200, return_sequences=True))(input_title)
----> 9 x_attention = Attention.Hierarchical_Attention(100)(x)
10 #y = Bidirectional(LSTM(units = 100, return_sequences = True, kernel_initializer = keras.initializers.lecun_normal(seed = None), unit_forget_bias = True))(input_content)
11 y = Bidirectional(GRU(100, return_sequences=True))(input_content)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
927
928 # Maintains info about the `Layer.call` stack.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1096 # Build layer if applicable (if the `build` method has been
1097 # overridden).
-> 1098 self._maybe_build(inputs)
1099 cast_inputs = self._maybe_cast_inputs(inputs, input_list)
1100
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in _maybe_build(self, inputs)
2641 # operations.
2642 with tf_utils.maybe_init_scope(self):
-> 2643 self.build(input_shapes) # pylint:disable=not-callable
2644 # We must set also ensure that the layer is marked as built, and the build
2645 # shape is stored since user defined build functions may not be calling
/content/Attention.py in build(self, input_shape)
23 self.b = K.variable(self.init((self.attention_dim, )))
24 self.u = K.variable(self.init((self.attention_dim, 1)))
---> 25 self.trainable_weights = [self.W, self.b, self.u]
26 super(Hierarchical_Attention, self).build(input_shape)
27
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __setattr__(self, name, value)
2765 ('Can\'t set the attribute "{}", likely because it conflicts with '
2766 'an existing read-only #property of the object. Please choose a '
-> 2767 'different name.').format(name))
2768 return
2769
AttributeError: Can't set the attribute "trainable_weights", likely because it conflicts with an existing read-only #property of the object. Please choose a different name.
I'm a noob in Neural Networks. Please help.
I ran into the same problem when I was trying to execute the code on Google Colab.
I found some answers on StackOverflow says it's an ongoing issue with tf on Colab.
link here
It remains unsolved for me, but I believe you can try to set self._trainable_weights instead of self.trainable_weights
I'm trying to follow this tutrial https://colab.research.google.com/github/tensorflow/examples/blob/master/community/en/transformer_chatbot.ipynb, However, when I tried to save the model in order to load it again without training I got an error mentioned here NotImplementedError: Layers with arguments in `__init__` must override `get_config`
I understood from the answer that I need to make the encoder and decoder as classes and customise it(instead of leaving it as functions like the colab tutrial) so I went back to tensor flow documentation of this model here: https://www.tensorflow.org/tutorials/text/transformer#encoder_layer and tried to edit in it. I made the encoder layer as:
class EncoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, rate=0.1,**kwargs,):
#super(EncoderLayer, self).__init__()
super().__init__(**kwargs)
self.mha = MultiHeadAttention(d_model, num_heads)
self.ffn = point_wise_feed_forward_network(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = tf.keras.layers.Dropout(rate)
self.dropout2 = tf.keras.layers.Dropout(rate)
def get_config(self):
config = super().get_config().copy()
config.update({
#'vocab_size': self.vocab_size,
#'num_layers': self.num_layers,
#'units': self.units,
'd_model': self.d_model,
'num_heads': self.num_heads,
'dropout': self.dropout,
})
return config
def call(self, x, training, mask):
attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model)
ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model)
return out2
and same for the decoder layer class. Then the same encoder in the documentation of tf
class Encoder(tf.keras.layers.Layer):
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
maximum_position_encoding, rate=0.1):
super(Encoder, self).__init__()
self.d_model = d_model
self.num_layers = num_layers
self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
self.pos_encoding = positional_encoding(maximum_position_encoding,
self.d_model)
self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)
for _ in range(num_layers)]
self.dropout = tf.keras.layers.Dropout(rate)
def call(self, x, training, mask):
seq_len = tf.shape(x)[1]
# adding embedding and position encoding.
x = self.embedding(x) # (batch_size, input_seq_len, d_model)
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
x += self.pos_encoding[:, :seq_len, :]
x = self.dropout(x, training=training)
for i in range(self.num_layers):
x = self.enc_layers[i](x, training, mask)
return x # (batch_size, input_seq_len, d_model)
the function of the model as:
def transformer(vocab_size,
num_layers,
units,
d_model,
num_heads,
dropout,
name="transformer"):
inputs = tf.keras.Input(shape=(None,), name="inputs")
dec_inputs = tf.keras.Input(shape=(None,), name="dec_inputs")
enc_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='enc_padding_mask')(inputs)
# mask the future tokens for decoder inputs at the 1st attention block
look_ahead_mask = tf.keras.layers.Lambda(
create_look_ahead_mask,
output_shape=(1, None, None),
name='look_ahead_mask')(dec_inputs)
# mask the encoder outputs for the 2nd attention block
dec_padding_mask = tf.keras.layers.Lambda(
create_padding_mask, output_shape=(1, 1, None),
name='dec_padding_mask')(inputs)
enc_outputs = Encoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
input_vocab_size=vocab_size,
)(inputs=[inputs, enc_padding_mask])
dec_outputs = Decoder(
num_layers=num_layers, d_model=d_model, num_heads=num_heads,
target_vocab_size=vocab_size,
)(inputs=[dec_inputs, enc_outputs, look_ahead_mask, dec_padding_mask])
outputs = tf.keras.layers.Dense(units=vocab_size, name="outputs")(dec_outputs)
return tf.keras.Model(inputs=[inputs, dec_inputs], outputs=outputs, name=name)
and calling the model:
#the model itself with its paramters:
# Hyper-parameters
NUM_LAYERS = 3
D_MODEL = 256
#D_MODEL=tf.cast(D_MODEL, tf.float32)
NUM_HEADS = 8
UNITS = 512
DROPOUT = 0.1
model = transformer(
vocab_size=VOCAB_SIZE,
num_layers=NUM_LAYERS,
units=UNITS,
d_model=D_MODEL,
num_heads=NUM_HEADS,
dropout=DROPOUT)
However, I got that error:
TypeError: __init__() missing 2 required positional arguments: 'dff' and 'maximum_position_encoding'
I am really confused and I don't understand what dff and maximum position encoding mean in the documentation and when I removed them from the encoder and decoder classes, I got anther error as positional_encoding function takes maximum position as input and also dff is passed as input inside the class. I am not so sure what I should do as I am not sure whether I am following the right steps or not
If you get this error while calling transformer then your problem is with creating the model, not saving it.
Other than that, I see several issues with your get_config:
You defined dropout instead of rate.
The attributes you address (self.d_model etc.) are not defined or assigned at __init__.
It doesn't exist for your Encoder class.
I'm trying to build a variational autoencoder in Keras following the example in the Keras repository. Here's my setup:
from keras.layers import Dense, Input, merge, concatenate, Dense, LSTM, Lambda, Flatten, Reshape
from keras import backend as K
from keras.models import Model
from keras.losses import mse
import numpy as np
class VAE:
def __init__(self, n_verts=15, n_dims=3, n_layers=3, n_units=128, latent_dim=2):
self.n_verts = n_verts
self.n_dims = n_dims
self.n_layers = n_layers
self.n_units = n_units
self.latent_dim = latent_dim
self.encoder = self.build_encoder()
self.decoder = self.build_decoder()
inputs = Input((self.n_verts, self.n_dims))
outputs = self.decoder(self.encoder(inputs)[2])
self.model = Model(inputs, outputs, name='vae')
self.model.compile(optimizer='adam', loss=self.get_loss)
def build_encoder(self):
i = Input(shape=(self.n_verts, self.n_dims), name='encoder_input')
h = i
h = Flatten()(h)
h = Dense(self.n_units, activation='relu')(h)
for idx in range(1, self.n_layers, 1):
h = Dense(self.n_units // (2*idx), activation='relu')(h)
self.z_mean = Dense(self.latent_dim, name='z_mean')(h)
self.z_log_var = Dense(self.latent_dim, name='z_log_var')(h)
# use reparameterization trick to factor stochastic node out of gradient flow
self.z = Lambda(self.sample, output_shape=(self.latent_dim,), name='z')([self.z_mean, self.z_log_var])
return Model(i, [self.z_mean, self.z_log_var, self.z], name='encoder')
def sample(self, args):
'''
Reparameterization trick by sampling from an isotropic unit Gaussian.
#arg (tensor): mean and log of variance of Q(z|X)
#returns z (tensor): sampled latent vector
'''
z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
dim = K.int_shape(z_mean)[1]
# by default, random_normal has mean = 0 and std = 1.0
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
def build_decoder(self):
i = Input(shape=(self.latent_dim,), name='z_sampling')
h = i
for idx in range(1, self.n_layers, 1):
h = Dense(self.n_units//(2*(self.n_layers-idx)), activation='relu')(h)
h = Dense(self.n_units, activation='relu')(h)
h = Dense(self.n_verts * self.n_dims, activation='sigmoid')(h)
o = Reshape((self.n_verts, self.n_dims))(h)
return Model(i, o, name='decoder')
def get_loss(self, inputs, outputs):
reconstruction_loss = mse(inputs, outputs)
reconstruction_loss *= self.n_verts * self.n_dims
return reconstruction_loss # this works fine
kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss) # todo: make this balance parameterizable
return vae_loss # this doesn't
def train(self, X, predict='frame', n_epochs=10000):
for idx in range(n_epochs):
i = np.random.randint(0, X.shape[1]-1) # sample idx
frame = np.expand_dims( X[:,i:i+1,:].squeeze(), axis=0) # shape = 1 sample, v verts, d dims
next_frame = np.expand_dims( X[:,i+1:i+2,:].squeeze(), axis=0)
if predict == 'frame': loss = self.model.train_on_batch(frame, frame)
elif predict == 'next_frame': loss = self.model.train_on_batch(frame, next_frame)
if idx % 1000 == 0:
print(' * training idx', idx, 'loss', loss)
X_train = np.random.rand(15, 100, 3)
vae = VAE(n_verts=15, latent_dim=2, n_layers=3, n_units=128)
vae.encoder.summary()
vae.train(X_train, n_epochs=10000, predict='frame')
This works, but if you look at the get_loss function you'll see it's returning a little prematurely. If I comment out return reconstruction_loss so that the loss function returns vae_loss, I get an error:
--------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call last) <ipython-input-7-57d76ed539a4> in <module>
78 vae = VAE(n_verts=15, latent_dim=2, n_layers=3, n_units=128)
79 vae.encoder.summary()
---> 80 vae.train(X_train, n_epochs=10000, predict='frame')
<ipython-input-7-57d76ed539a4> in train(self, X, predict, n_epochs)
70 frame = np.expand_dims( X[:,i:i+1,:].squeeze(), axis=0) # shape = 1 sample, v verts, d dims
71 next_frame = np.expand_dims( X[:,i+1:i+2,:].squeeze(), axis=0)
---> 72 if predict == 'frame': loss = self.model.train_on_batch(frame, frame)
73 elif predict == 'next_frame': loss = self.model.train_on_batch(frame, next_frame)
74 if idx % 1000 == 0:
~/anaconda/envs/3.5/lib/python3.5/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight) 1215 ins = x + y + sample_weights 1216 self._make_train_function()
-> 1217 outputs = self.train_function(ins) 1218 return unpack_singleton(outputs) 1219
~/anaconda/envs/3.5/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs) 2713 return self._legacy_call(inputs) 2714
-> 2715 return self._call(inputs) 2716 else: 2717 if py_any(is_tensor(x) for x in inputs):
~/anaconda/envs/3.5/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in _call(self, inputs) 2673 fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata) 2674 else:
-> 2675 fetched = self._callable_fn(*array_vals) 2676 return fetched[:len(self.outputs)] 2677
~/anaconda/envs/3.5/lib/python3.5/site-packages/tensorflow/python/client/session.py in __call__(self, *args, **kwargs) 1437 ret = tf_session.TF_SessionRunCallable( 1438 self._session._session, self._handle, args, status,
-> 1439 run_metadata_ptr) 1440 if run_metadata: 1441 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~/anaconda/envs/3.5/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
526 None, None,
527 compat.as_text(c_api.TF_Message(self.status.status)),
--> 528 c_api.TF_GetCode(self.status.status))
529 # Delete the underlying status object from memory otherwise it stays alive
530 # as there is a reference to status from this from the traceback due to
InvalidArgumentError: You must feed a value for placeholder tensor 'encoder_input_6' with dtype float and shape [?,15,3] [[{{node encoder_input_6}}]]
Does anyone know how to resolve this error? Any suggestions would be hugely appreciated!
Ah, I got this worked out once my variables were scoped properly:
from keras.layers import Dense, Input, merge, concatenate, Dense, LSTM, Lambda, Flatten, Reshape
from keras import backend as K
from keras.models import Model
from keras.losses import mse
import numpy as np
class VAE:
def __init__(self, n_verts=15, n_dims=3, n_layers=3, n_units=128, latent_dim=2):
self.input_shape = (n_verts*n_dims,)
self.n_layers = n_layers
self.n_units = n_units
self.latent_dim = latent_dim
# build the encoder and decoder
inputs = Input(shape=self.input_shape, name='encoder_input')
self.encoder = self.get_encoder(inputs)
self.decoder = self.get_decoder()
# build the VAE
outputs = self.decoder(self.encoder(inputs)[2])
self.model = Model(inputs, outputs, name='vae_mlp')
# add loss and compile
self.model.add_loss(self.get_loss(inputs, outputs))
self.model.compile(optimizer='adam')
def get_encoder(self, inputs):
h = inputs
h = Dense(self.n_units, activation='relu')(h)
for idx in range(1, self.n_layers, 1):
h = Dense(self.n_units // (2*idx), activation='relu')(h)
self.z_mean = Dense(self.latent_dim, name='z_mean')(h)
self.z_log_var = Dense(self.latent_dim, name='z_log_var')(h)
z = Lambda(self.sampling, output_shape=(self.latent_dim,), name='z')([self.z_mean, self.z_log_var])
encoder = Model(inputs, [self.z_mean, self.z_log_var, z], name='encoder')
return encoder
def sampling(self, args):
self.z_mean, self.z_log_var = args
batch = K.shape(self.z_mean)[0]
dim = K.int_shape(self.z_mean)[1]
# by default, random_normal has mean = 0 and std = 1.0
epsilon = K.random_normal(shape=(batch, dim))
return self.z_mean + K.exp(0.5 * self.z_log_var) * epsilon
def get_decoder(self):
latent_inputs = Input(shape=(self.latent_dim,), name='z_sampling')
h = latent_inputs
for idx in range(1, self.n_layers, 1):
h = Dense(self.n_units//(2*(self.n_layers-idx)), activation='relu')(h)
h = Dense(self.n_units, activation='relu')(h)
outputs = Dense(self.input_shape[0], activation='sigmoid')(h)
decoder = Model(latent_inputs, outputs, name='decoder')
return decoder
def get_loss(self, inputs, outputs):
reconstruction_loss = mse(inputs, outputs)
reconstruction_loss *= self.input_shape[0]
kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
return vae_loss
# train
x_train = np.random.rand(10000, 45)
vae = VAE(n_verts=15, latent_dim=2, n_layers=3, n_units=128)
vae.model.fit(x_train[:-1000,:],
epochs=100,
batch_size=128,
validation_data=(x_train[-1000:,:], None))