I am trying to implement a custom version of the PElu activation function in tensorflow. The custom thing about this activation is the knee of the relu is smoothed. I got the equation from this paper.
Here is the code:
from keras import backend as K
import tensorflow as tf
def SMU_LeakyPRElu(x, alpha=2.5,u=1.0):
return ((1+alpha)*x)+((1-alpha)*x)*(tf.math.erf(u*(1-alpha)*x))
from keras.layers import Layer
class SMU_LeakyPRElu(Layer):
def __init__(self, alpha=2.5, u=1.0, trainable=False, **kwargs):
super(SMU_LeakyPRElu, self).__init__(**kwargs)
self.supports_masking = True
self.alpha = alpha
self.u = u
self.trainable = trainable
def build(self, input_shape):
self.alpha_factor = K.variable(self.alpha,
dtype=K.floatx(),
name='alpha_factor')
self.u_factor = K.variable(self.u,
dtype=K.floatx(),
name='u_factor')
if self.trainable:
self._trainable_weights.append(self.alpha_factor)
self._trainable_weights.append(self.u_factor)
super(SMU_LeakyPRElu, self).build(input_shape)
def call(self, inputs, mask=None):
return SMU_LeakyPRElu(inputs, self.alpha_factor,self.u_factor)
def get_config(self):
config = {'alpha': self.get_weights()[0] if self.trainable else self.alpha,
'u' : self.get_weights()[1] if self.trainable else self.u,
'trainable': self.trainable}
base_config = super(SMU_LeakyPRElu, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return input_shape
x = tf.random.normal((1,10,4))
print(x)
input_shape = (1,10,4)
input_layer = tf.keras.layers.Input(shape=input_shape[1:], name="input_layer")
layer_1 = tf.keras.layers.Conv1D(2, 1,padding = 'valid', input_shape=input_shape[:1])(input_layer)
layer_2 = SMU_LeakyPRElu(alpha=2.5,u=1.0,trainable=True)(layer_1)
model = tf.keras.models.Model(input_layer, layer_2, name="model")
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss="categorical_crossentropy", run_eagerly=True)
print(model.summary())
result = model.predict(x)
print(result)
print(result.shape)
I implemented this code using a example from this post at Data Science SE.
Error:
tf.Tensor(
[[[ 1.0467066 -1.1833347 1.5384735 2.078511 ]
[-1.6025988 -0.30846047 0.8019808 0.3113866 ]
[ 0.58313304 -0.90643036 -0.3926888 -0.6210553 ]
[ 0.16505387 -0.5930619 0.6983522 -0.12211661]
[ 0.06077941 -0.11117186 -1.2540722 -0.32234746]
[ 0.41838828 0.7090619 0.30999053 0.10459523]
[ 0.35603598 -0.2695868 -0.17901018 -0.09100233]
[ 1.2746769 0.8311447 0.02825974 -0.48021472]
[-1.536545 -0.24765234 -0.36437735 -1.1891246 ]
[ 0.7531206 -0.56109476 -0.65761757 0.19102335]]], shape=(1, 10, 4), dtype=float32)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-50-c9d490dfd533> in <module>
5 input_layer = tf.keras.layers.Input(shape=input_shape[1:], name="input_layer")
6 layer_1 = tf.keras.layers.Conv1D(2, 1,padding = 'valid', input_shape=input_shape[:1])(input_layer)
----> 7 layer_2 = SMU_LeakyPRElu(alpha=2.5,u=1.0,trainable=True)(layer_1)
8
9 model = tf.keras.models.Model(input_layer, layer_2, name="model")
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/type_spec.py in type_spec_from_value(value)
888 3, "Failed to convert %r to tensor: %s" % (type(value).__name__, e))
889
--> 890 raise TypeError(f"Could not build a TypeSpec for {value} of "
891 f"unsupported type {type(value)}.")
892
TypeError: Could not build a TypeSpec for <__main__.SMU_LeakyPRElu object at 0x7fde698f7850> of unsupported type <class '__main__.SMU_LeakyPRElu'>.
I don't understand this error. How should I implement this function as custom activation function with trainable parameters alpha and u.?
The problem is that you have named your activation function and the custom layer you created the same thing. I refactored your code for you.
Code:
import tensorflow as tf
from typing import Optional
from tensorflow.keras import Model
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Layer
from tensorflow.keras.optimizers import Adam
class SMULeakyPReLU(Layer):
"""``SMULeakyPReLU``."""
def __init__(self,
alpha: float = 2.5,
u: float = 1.,
trainable: bool = False,
**kwargs):
super().__init__(**kwargs)
self.alpha = alpha
self.u = u
self.trainable = trainable
def build(self, input_shape: tf.TensorShape):
super().build(input_shape)
self.alpha_factor = tf.Variable(
self.alpha,
dtype=tf.float32,
trainable=self.trainable,
name="alpha_factor")
self.u_factor = tf.Variable(
self.u,
dtype=tf.float32,
name="u_factor")
def call(self,
inputs: tf.Tensor,
mask: Optional[tf.Tensor] = None
) -> tf.Tensor:
fst = (1. + self.alpha_factor) * inputs
snd = (1. - self.alpha_factor) * inputs
trd = tf.math.erf(self.u_factor * (1. - self.alpha_factor) * inputs)
return fst * snd * trd
def get_config(self):
config = {
"alpha": self.get_weights()[0] if self.trainable else self.alpha,
"u": self.get_weights()[1] if self.trainable else self.u,
"trainable": self.trainable
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
Test
# fake data
x = tf.random.normal((1, 10, 4))
# create network
input_layer = Input(shape=x.shape[1:], name="input_layer")
layer_1 = Conv1D(2, 1, padding="valid")(input_layer)
layer_2 = SMULeakyPReLU(alpha=2.5, u=1.0, trainable=True)(layer_1)
# create model
model = Model(input_layer, layer_2, name="model")
# compile model and summary
model.compile(
optimizer=Adam(learning_rate=5e-4),
loss="categorical_crossentropy",
run_eagerly=True)
print(model.summary())
# forward pass
result = model.predict(x)
print(result)
print(result.shape)
# Model: "model"
# _________________________________________________________________
# Layer (type) Output Shape Param #
# =================================================================
# input_layer (InputLayer) [(None, 10, 4)] 0
#
# conv1d_1 (Conv1D) (None, 10, 2) 10
#
# smu_leaky_p_re_lu_1 (SMULea (None, 10, 2) 2
# kyPReLU)
#
# =================================================================
# Total params: 12
# Trainable params: 12
# Non-trainable params: 0
# _________________________________________________________________
# None
# 1/1 [==============================] - 0s 13ms/step
# [[[-1.6503611e+01 -3.5051659e+01]
# [ 4.0098205e-02 1.5923592e+00]
# [-1.4898951e+00 7.5487376e-05]
# [ 3.1900513e+01 2.8786476e+01]
# [ 1.9207695e+01 3.6511238e+01]
# [-6.8302655e-01 -4.7705490e-02]
# [ 9.6008554e-03 7.5611029e+00]
# [ 4.7136435e-01 2.5528276e+00]
# [ 2.6859209e-01 3.3496175e+00]
# [ 1.4372441e+01 3.4978668e+01]]]
# (1, 10, 2)
Related
I'm trying to build a custom Transformer model within TensorFlow but I'm stuck. I always receive this error:
OperatorNotAllowedInGraphError: Iterating over a symbolic `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
Of course I did some research and I found many solutions, but I'm failing to implement these to my own model since I'm not even sure where exactly this error occurs. I tried to remove all for loops, but the error still remained. I hope someone can help me and point me to the right direction.
I'm using:
python: 3.9
tensorflow: 2.9.1
keras: 2.9.0
import tensorflow as tf
import keras.backend as K # for custom loss function
import tensorflow_probability as tfp
from tensorflow.python.keras.layers import Dense, Dropout, GlobalAveragePooling1D, MultiHeadAttention, Input, Conv1D
from keras.layers import LayerNormalization
import numpy as np
class MultiAttention(tf.keras.Model):
def __init__(self, head_size, num_heads, dropout_attention, dropout1, dropout2, ff_dim):
super(MultiAttention, self).__init__()
# define all layers in init
self.LN1 = LayerNormalization(epsilon=1e-6)
self.MHA = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout_attention)
self.Dropout1 = Dropout(dropout1)
self.LN2 = LayerNormalization(epsilon=1e-6)
self.CONV1 = Conv1D(filters=ff_dim, kernel_size=1, activation="relu")
self.Dropout2 = Dropout(dropout2)
self.CONV2 = None
def build(self, input_shape):
self.CONV2 = Conv1D(filters=input_shape.shape[-1], kernel_size=1)
def call(self, inputs, training=False):
# Normalization and Attention
x = self.LN1(epsilon=1e-6)(inputs)
x = self.MHA(x, x)
x = self.Dropout1(x)
x_att = x + inputs
# Feed Forward Part
x = self.LN2(x_att)
x = self.Conv1(x)
x = self.Dropout2(x)
x = self.Conv1(x)
return x + x_att
class Transformer(tf.keras.Model): # tf.keras.Model inherits model.fit/predict/evaluate. tf.keras.Layer can't do this
def __init__(self, head_size, num_heads, dropout_attention, dropout1, dropout2, ff_dim,
num_transformer_blocks, mlp_units, mlp_dropout):
super(Transformer, self).__init__()
# define all layers in init
self.head_size = head_size
self.num_heads = num_heads
self.dropout_attention = dropout_attention
self.dropout1 = dropout1
self.dropout2 = dropout2
self.ff_dim = ff_dim
self.num_transformer_blocks = num_transformer_blocks
self.mlp_units = mlp_units
self.GAP = GlobalAveragePooling1D(data_format="channels_first")
self.Dense1 = Dense(mlp_units, activation="relu")
self.Dropout = Dropout(mlp_dropout)
self.Dense2 = Dense(1, activation="softmax")
self.MA = None
def build(self, input_shape):
self.MA = MultiAttention(self.head_size, self.num_heads, self.dropout_attention,
self.dropout1, self.dropout2, self.ff_dim)
# for dim in range(self.mlp_units):
# self.Dense1_list.append(Dense(dim, actiavtion="relu"))
def call(self, input_shape, training=False):
x = Input(input_shape)
for _ in range(self.num_transformer_blocks):
x = self.MA(x)
x = self.GAP(x)
for unit in self.Dense1_list:
x = unit(x)
x = self.Dropout(x)
x = self.Dense1(x)
x = self.Dropout(x)
return self.Dense2(x)
class CustomLoss(tf.keras.losses.Loss):
def __init__(self):
super().__init__()
def call(self, y_true, y_pred):
alpha = K.std(y_pred) / K.std(y_true)
beta = K.sum(y_pred) / K.sum(y_true) # no need to calc mean
r = tfp.stats.correlation(y_true, y_pred, sample_axis=None, event_axis=None)
return K.sqrt(K.square(1 - r) + K.square(1 - alpha) + K.square(1 - beta))
def create_model(head_size, num_heads, dropout_attention, dropout1, dropout2, ff_dim,
num_transformer_blocks, mlp_units, mlp_dropout, lr):
model = Transformer(head_size, num_heads, dropout_attention, dropout1, dropout2, ff_dim,
num_transformer_blocks, mlp_units, mlp_dropout)
model.compile(
loss=CustomLoss(),
optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
)
return model
model = create_model(256,4, 0.10, 0.12, 0.15, 2, 4, 3, 0.10, 0.001)
model.fit(np.zeros((2, 257, 11)),
np.zeros((2, 1)),
epochs=1,
batch_size=512,
)
I am trying to make a custom layer from TensorFlow's layers.Layer instance.
I am trying to make a IIR filter, so that's using values from the input layer and computing an output sequence, something like this:
y[i] = a0 * x[i] + a1 * x[i - 1] + b1 * y[i - 1]
where x is the input and y is the output. I define the class this way:
class IIR(keras.layers.Layer):
def __init__(self, input_dim):
super(IIR, self).__init__()
self.input_dim = 60
self.b0 = tf.Variable(tf.constant([uniform(-1, 1)]))
self.b1 = tf.Variable(tf.constant([uniform(-1, 1)]))
self.b2 = tf.Variable(tf.constant([uniform(-1, 1)]))
self.a1 = tf.Variable(tf.constant([uniform(-1, 1)]))
self.a2 = tf.Variable(tf.constant([uniform(-1, 1)]))
def call(self, inputs):
order = 3
init_dim = [0,1,2]
output_sequence = tf.constant(np.zeros((self.input_dim)),dtype=tf.float32)
outt = np.zeros(self.input_dim)
outt[0] = inputs[0]
outt[1] = inputs[1]
outt[2] = inputs[2]
for i in range(2,self.input_dim):
outt[i] = self.b0*inputs[i] + self.b1*inputs[i-1] + self.b2*inputs[i-2] - self.a1*outt[i-1] - self.a2*outt[i-2]
output_sequence = tf.constant(outt)
return output_sequence
but I keep getting the error
ValueError: Exception encountered when calling layer "iir_13" (type IIR).
in user code:
File "<ipython-input-37-0717fc982e73>", line 17, in call *
outt[0] = inputs[:][0]
ValueError: setting an array element with a sequence.
Call arguments received:
• inputs=tf.Tensor(shape=(None, 60), dtype=float32)
and so on. The shape of the input is (None, 60) (I'm setting 60 just for testing purposes) and I am assuming None will be replaced by batch size when training? How can I have access to the values of the input? What's the actual shape of the input? Is this the right approach?
EDIT: I am trying to implement this in a model, something like this:
model = keras.Sequential()
model.add(keras.layers.Input(shape=60))
model.add(IIR(input_dim=60))
model.add(keras.layers.Dense(8, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy')
Not sure what exactly you want to do, but I would recommend using Tensorflow operations only. Here is an example:
import tensorflow as tf
class IIR(tf.keras.layers.Layer):
def __init__(self, input_dim):
super(IIR, self).__init__()
self.input_dim = input_dim
self.b0 = tf.Variable(tf.random.uniform((1,), minval=-1, maxval=1))
self.b1 = tf.Variable(tf.random.uniform((1,), minval=-1, maxval=1))
self.b2 = tf.Variable(tf.random.uniform((1,), minval=-1, maxval=1))
self.a1 = tf.Variable(tf.random.uniform((1,), minval=-1, maxval=1))
self.a2 = tf.Variable(tf.random.uniform((1,), minval=-1, maxval=1))
def call(self, inputs):
batch_size = tf.shape(inputs)[0]
output_sequence = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True, clear_after_read=False)
output_sequence = output_sequence.write(0, inputs[:, 0])
output_sequence = output_sequence.write(1, inputs[:, 1])
output_sequence = output_sequence.write(2, inputs[:, 2])
for i in range(2, self.input_dim):
output_sequence = output_sequence.write(i, self.b0*inputs[:, i] + self.b1*inputs[:, i-1]
+ self.b2*inputs[:, i-2] - self.a1*output_sequence.read(i-1)
- self.a2*output_sequence.read(i-2))
result = output_sequence.stack()
return tf.reshape(result, tf.shape(inputs))
iir = IIR(input_dim=60)
tf.print(iir(tf.random.normal((2, 60))).shape)
iir = IIR(input_dim=60)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=60))
model.add(IIR(input_dim=60))
model.add(tf.keras.layers.Dense(8, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
print(model.summary())
TensorShape([2, 60])
Model: "sequential_21"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
iir_80 (IIR) (None, 60) 5
dense_20 (Dense) (None, 8) 488
dense_21 (Dense) (None, 1) 9
=================================================================
Total params: 502
Trainable params: 502
Non-trainable params: 0
_________________________________________________________________
None
I am learning about Models subclass definitions in TensorFlow
A pretty straightforward definition will be something like this
class MyNetwork1(tf.keras.Model):
def __init__(self, num_classes = 10):
super().__init__()
self.num_classes = num_classes
self.input_layer = tf.keras.layers.Flatten()
self.hidden_1 = tf.keras.layers.Dense(128, activation = 'relu')
self.hidden_2 = tf.keras.layers.Dense(64, activation = 'relu')
self.output_layer = tf.keras.layers.Dense(self.num_classes, activation = 'softmax')
def call(self, input_tensor):
x = self.input_layer(input_tensor)
x = self.hidden_1(x)
x = self.hidden_2(x)
x = self.output_layer(x)
return x
After building the model,
Model1 = MyNetwork1()
Model1.build((None, 28, 28, 1))
It will look like
Model: "my_network1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) multiple 0
_________________________________________________________________
dense (Dense) multiple 100480
_________________________________________________________________
dense_1 (Dense) multiple 8256
_________________________________________________________________
dense_2 (Dense) multiple 650
=================================================================
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
Since this method cannot customize the number of neurons and activation type per layer I have tried to edit it a little bit.
I have tried the following definition
class MyNetwork2(tf.keras.Model):
def __init__(self, num_classes = 2, hidden_dimensions = [100],
hidden_activations = ['relu']):
super().__init__()
self.inputlayer = tf.keras.layers.Flatten()
i = 0
self.hidden_layers = []
for d,a in zip(hidden_dimensions,hidden_activations):
i += 1
setattr(self, 'hidden_' + str(i) ,
tf.keras.layers.Dense(d, activation = a))
self.hidden_layers.append('self.hidden_' + str(i) + '(x)')
self.outputlayer = tf.keras.layers.Dense(num_classes, activation = 'softmax')
self.num_layers = len(hidden_dimensions) + 2
def call(self, inputtensor):
x = self.inputlayer(inputtensor)
for h in self.hidden_layers:
# print(h)
x = eval(h,{}, x)
x = self.outputlayer(x)
return x
In this code, I tried to do as same as the previous definition.
Model2 = MyNetwork2(num_classes = 10, hidden_dimensions = [128,64],
hidden_activations = ['relu', 'relu'])
Model2.build((None, 28, 28, 1))
However, I faced the following error:
TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got 'self'
How can I fix this error to achieve my goal?
Seems like a very complicated way to do things. If you use a dictionary for a variable number of layers instead of eval, everything works fine.
class MyNetwork2(tf.keras.Model):
def __init__(self, num_classes=2, hidden_dimensions=[100],
hidden_activations=['relu']):
super(MyNetwork2, self).__init__()
self.inputlayer = tf.keras.layers.Flatten()
self.hidden_layers = dict()
for i, (d, a) in enumerate(zip(hidden_dimensions, hidden_activations)):
self.hidden_layers['hidden_'+str(i)]=tf.keras.layers.Dense(d, activation=a)
self.outputlayer = tf.keras.layers.Dense(num_classes, activation='softmax')
self.num_layers = len(hidden_dimensions) + 2
Running example:
import tensorflow as tf
import numpy as np
class MyNetwork2(tf.keras.Model):
def __init__(self, num_classes=2, hidden_dimensions=[100],
hidden_activations=['relu']):
super(MyNetwork2, self).__init__()
self.inputlayer = tf.keras.layers.Flatten()
self.hidden_layers = dict()
for i, (d, a) in enumerate(zip(hidden_dimensions, hidden_activations)):
self.hidden_layers['hidden_'+str(i)]=tf.keras.layers.Dense(d, activation=a)
self.outputlayer = tf.keras.layers.Dense(num_classes, activation='softmax')
self.num_layers = len(hidden_dimensions) + 2
def call(self, inputtensor, training=None, **kwargs):
x = self.inputlayer(inputtensor)
for k, v in self.hidden_layers.items():
x = v(x)
x = self.outputlayer(x)
return x
Model2 = MyNetwork2(num_classes = 10, hidden_dimensions = [128,64],
hidden_activations = ['relu', 'relu'])
Model2.build((None, 28, 28, 1))
Model2(np.random.uniform(0, 1, (1, 28, 28, 1)).astype(np.float32))
<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[0.14969216, 0.10196744, 0.0874036 , 0.08350615, 0.18459582,
0.07227989, 0.08263624, 0.08537506, 0.10291573, 0.04962786]],
dtype=float32)>
The hidden layers as a dictionary:
Model2.hidden_layers
{'hidden_0': <tensorflow.python.keras.layers.core.Dense at 0x1891b5c13a0>,
'hidden_1': <tensorflow.python.keras.layers.core.Dense at 0x1891b5c1d00>}
I have built the following encoder-decoder architecture, and the encoder and decoder both work fine separately:
from tensorflow.keras.layers import LSTM, Input, Reshape, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
WORD_TO_INDEX = {"foo": 0, "bar": 1}
MAX_QUERY_WORD_COUNT = 10
QUERY_ENCODING_SIZE = 15
# ENCODER
query_encoder_input = Input(shape=(None, len(WORD_TO_INDEX)), name="query_encoder_input")
query_encoder_output = LSTM(QUERY_ENCODING_SIZE, name="query_encoder_lstm")(query_encoder_input)
query_encoder = Model(inputs=query_encoder_input, outputs=query_encoder_output)
# DECODER
query_decoder_input = Input(shape=(QUERY_ENCODING_SIZE,), name="query_decoder_input")
query_decoder_reshape = Reshape((1, QUERY_ENCODING_SIZE), name="query_decoder_reshape")(query_decoder_input)
query_decoder_lstm = LSTM(QUERY_ENCODING_SIZE, name="query_decoder_lstm", return_sequences=True, return_state=True)
recurrent_input, state_h, state_c = query_decoder_lstm(query_decoder_reshape)
states = [state_h, state_c]
query_decoder_outputs = []
for _ in range(MAX_QUERY_WORD_COUNT):
recurrent_input, state_h, state_c = query_decoder_lstm(recurrent_input, initial_state=states)
query_decoder_outputs.append(recurrent_input)
states = [state_h, state_c]
query_decoder_output = Lambda(lambda x: K.concatenate(x, axis=1), name="query_decoder_concat")(query_decoder_outputs)
query_decoder = Model(inputs=query_decoder_input, outputs=query_decoder_output)
But when I try to join them together to create an autoencoder, I get an odd error and I don't know why.
# AUTOENCODER
# apply the reshape layer to the output of the encoder
query_autoencoder_output = query_decoder.layers[1](query_encoder_output)
# rebuild the autoencoder by applying each layer of the decoder to the output of the encoder
for decoder_layer in query_decoder.layers[2:]:
# this fails and I don't know why
query_autoencoder_output = decoder_layer(query_autoencoder_output)
# the code never gets here
query_autoencoder = Model(inputs=query_encoder_input, outputs=query_autoencoder_output)
This throws the error:
ValueError: Shape must be rank 3 but is rank 2 for '{{node
query_decoder_concat/concat_1}} = ConcatV2[N=3, T=DT_FLOAT,
Tidx=DT_INT32](query_decoder_lstm/PartitionedCall_11:1,
query_decoder_lstm/PartitionedCall_11:2,
query_decoder_lstm/PartitionedCall_11:3,
query_decoder_concat/concat_1/axis)' with input shapes: [?,1,15],
[?,15], [?,15], [].
This is the template I used for my decoder. (See the "What if I don't want to use teacher forcing for training?" section.)
I relied on these StackOverflow questions (especially the last one) to figure out how to combine the models together.
What does this error mean and how can I fix it?
You can treat a model as a layer * essentially *. With an autoencoder, it'll be as straightforward as something like this:
autoencoder = Sequential([encoder, decoder])
If you want some extra flexibility you can subclass tf.keras.Model:
class AutoEncoder(tf.keras.Model):
def __init__(self, encoder, decoder):
super(AutoEncoder, self).__init__()
self.encoder = encoder
self.decoder = decoder
def call(self, inputs, training=None, **kwargs):
x = self.encoder(inputs)
x = self.decoder(x)
return x
ae = AutoEncoder(encoder, decoder)
ae.fit(...
Full reproducible example:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from tensorflow import keras
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
import numpy as np
(xtrain, ytrain), (xtest, ytest) = keras.datasets.cifar10.load_data()
train_ix = np.where(ytrain.ravel() == 1)
test_ix = np.where(ytest.ravel() == 1)
cars_train = xtrain[train_ix]
cars_test = xtest[test_ix]
cars = np.vstack([cars_train, cars_test]).astype(np.float32)/255
X = tf.data.Dataset.from_tensor_slices(cars).batch(8)
class Encoder(keras.Model):
def __init__(self):
super(Encoder, self).__init__()
self.flat = keras.layers.Flatten(input_shape=(32, 32, 3))
self.dense1 = keras.layers.Dense(128)
self.dense2 = keras.layers.Dense(32)
def call(self, inputs, training=None, **kwargs):
x = self.flat(inputs)
x = keras.activations.selu(self.dense1(x))
x = keras.activations.selu(self.dense2(x))
return x
class Decoder(keras.Model):
def __init__(self):
super(Decoder, self).__init__()
self.dense1 = keras.layers.Dense(128, input_shape=[32])
self.dense2 = keras.layers.Dense(32 * 32 * 3)
self.reshape = keras.layers.Reshape([32, 32, 3])
def call(self, inputs, training=None, **kwargs):
x = keras.activations.selu(self.dense1(inputs))
x = keras.activations.sigmoid(self.dense2(x))
x = self.reshape(x)
return x
class AutoEncoder(keras.Model):
def __init__(self, encoder, decoder):
super(AutoEncoder, self).__init__()
self.encoder = encoder
self.decoder = decoder
def call(self, inputs, training=None, **kwargs):
x = self.encoder(inputs)
x = self.decoder(x)
return x
ae = AutoEncoder(Encoder(), Decoder())
loss_object = keras.losses.BinaryCrossentropy()
reconstruction_loss = keras.metrics.Mean(name='reconstruction_loss')
optimizer = keras.optimizers.Adam()
#tf.function
def reconstruct(inputs):
with tf.GradientTape() as tape:
out = ae(inputs)
loss = loss_object(inputs, out)
gradients = tape.gradient(loss, ae.trainable_variables)
optimizer.apply_gradients(zip(gradients, ae.trainable_variables))
reconstruction_loss(loss)
if __name__ == '__main__':
template = 'Epoch {:2} Reconstruction Loss {:.4f}'
for epoch in range(50):
reconstruction_loss.reset_states()
for input_batches in X:
reconstruct(input_batches)
print(template.format(epoch + 1, reconstruction_loss.result()))
Output:
Epoch 35 Reconstruction Loss 0.5794
Epoch 36 Reconstruction Loss 0.5793
Epoch 37 Reconstruction Loss 0.5792
Epoch 38 Reconstruction Loss 0.5791
Epoch 39 Reconstruction Loss 0.5790
Epoch 40 Reconstruction Loss 0.5789
Based off of M Z's answer, but without using Sequential, you can do this:
query_autoencoder = Model(inputs=query_encoder_input, outputs=query_decoder(query_encoder_output))
query_autoencoder.summary()
The summary also breaks down into more layers than M Z's answer did.
For anyone using Javascript and Tensorflow JS, here is an example of how to make deeper networks.
I thought I would include this since most Javascript examples of autoencoders show only one encoding and decoding layer.
function buildModel (layers)
{
const model = tf.sequential();
layers.forEach(layer =>
{
model.add(layer);
});
return model;
}
Then you can make a deeper network with more encoding and decoding layers with:
// [ encoder ] [ decoder ]
// Model layers: 20 -> 10 -> 2 -> 10 -> 20
// Encoding Layers
const encoder = [
tf.layers.dense({ inputShape: [20], units: 10, activation: 'relu' }),
tf.layers.dense({ units: 2, activation: 'relu' })
];
// Decoding Layers
const decoder = [
tf.layers.dense({ units: 10, activation: 'relu' }),
tf.layers.dense({ units: 20, activation: 'relu' })
];
// Build model
const model = buildModel([...encoder, ...decoder]);
// ... Then compile and Train ...
After training, predictions can be made with:
const predictionModel = buildModel([...encoder]);
predictionModel.predict(data);
I've spent a lot of time with Tensorflow v.0 and v.1, and now I'm trying Tensorflow v.2 keras model. model.summary() looked easy and convenient, but lack details.
Here's a toy example. Let's say I define custom layers and models as below (a functional API style and a subclass syle).
Please see below. I wanted to see primitive layers inside the custom layers, but .summary() only shows shallow information (only direct children layers).
Toy custom layers (layers are just toy definitions):
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import (Dense, Conv2D, BatchNormalization)
class LayerA(tf.keras.layers.Layer):
def __init__(self, num_outputs, **kwargs):
super(LayerA, self).__init__(**kwargs)
self.num_outputs = num_outputs
self.dense1 = Dense(64)
self.dense2 = Dense(128)
self.dense3 = Dense(num_outputs)
self.bn = BatchNormalization(trainable=True)
def call(self, inputs, training=True):
x = self.dense1(inputs)
x = self.bn(x, training=training)
x = self.dense1(inputs)
x = self.bn(x, training=training)
x = self.dense1(inputs)
x = self.bn(x, training=training)
return x
class LayerB(tf.keras.layers.Layer):
def __init__(self, num_outputs, **kwargs):
super(LayerB, self).__init__(**kwargs)
self.num_outputs = num_outputs
self.dense = Dense(64)
self.bn = BatchNormalization(trainable=True)
def call(self, inputs, training=True):
x = self.dense(inputs)
x = self.bn(x, training=training)
return x
Model definition with functional API:
inputs = tf.keras.Input(shape=(28), name='input')
x = LayerA(7, name='layer_a')(inputs)
x = LayerB(13, name='layer_b')(x)
x = tf.reduce_max(x, 1)
model_func = keras.Model(inputs=inputs, outputs=x, name='model')
model_func.summary()
# Results:
# Layer (type) Output Shape # Param #
# =================================================================
# input (InputLayer) [(None, 28)] 0
# _________________________________________________________________
# layer_a (LayerA) (None, 64) 2112
# layer_b (LayerB) (None, 64) 4416
# tf_op_layer_Max_1 (TensorFlo [(None,)] 0
# =================================================================
# Total params: 6,528
# Trainable params: 6,272
# Non-trainable params: 256
Model definition subclassing:
class ModelA(tf.keras.Model):
def __init__(self):
super(ModelA, self).__init__()
self.block_1 = LayerA(7, name='layer_a')
self.block_2 = LayerB(13, name='layer_b')
def call(self, inputs):
x = self.block_1(inputs)
x = self.block_2(x)
x = tf.reduce_max(x, 1)
return x
model_subclass = ModelA()
y = model_subclass(inputs)
model_subclass.summary()
### Result:
# Layer (type) Output Shape Param #
# =================================================================
# layer_a (LayerA) (None, 64) 2112
# layer_b (LayerB) (None, 64) 4416
# =================================================================
# Total params: 6,528
# Trainable params: 6,016
# Non-trainable params: 512
How can I prints all the activation shapes of Conv and Dense layers in the model? For example,
layer_a/dense_1 (None, ...)
layer_a/dense_2 (None, ...)
layer_b/dense_1 (None, ...)
layer_b/maybe-even-deeper-layer/conv2d_1 (None, ...)
... etc ...
In Tensorflow v.0 or v.1, I would do something like:
for n in tf.get_default_graph().as_graph_def().node:
print(n.name, n.shape)
Is there a way to prints more details when I have a keras model?
The summary will not do this automatically, so you have to adapt. You can, for instance, create a recurrent summary:
def full_summary(layer):
#check if this layer has layers
if hasattr(layer, 'layers'):
print('summary for ' + layer.name)
layer.summary()
print('\n\n')
for l in layer.layers:
full_summary(l)
Use it as:
full_summary(my_model)