I have a simple model with one custom layer which works fine in the normal case.
When I switched to eager execution via tf.enable_eager_execution(), I got stuck on a weird error.
Here is the code so far:
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Input
from tensorflow.keras.losses import kullback_leibler_divergence
tf.enable_eager_execution()
class ClusteringLayer(Layer):
def __init__(self, output_dim, input_dim=None, alpha=1.0, **kwargs):
self.output_dim = output_dim
self.input_dim = input_dim
self.alpha = alpha
super(ClusteringLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='kernel', shape=(self.output_dim, input_shape[1]), initializer='uniform', trainable=True)
super(ClusteringLayer, self).build(input_shape)
def call(self, x, mask=None):
q = 1.0/(1.0 + K.sqrt(K.sum(K.square(K.expand_dims(x, 1) - self.W), axis=2))**2 /self.alpha)
q = q**((self.alpha+1.0)/2.0)
q = K.transpose(K.transpose(q)/K.sum(q, axis=1))
return q
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
def clustering_loss(y_true, y_pred):
a = K.square(y_pred) / K.sum(y_pred, axis=0)
p = K.transpose(K.transpose(a) / K.sum(a, axis=1))
loss = kullback_leibler_divergence(p, y_pred)
return loss
input1 = Input(shape=(10,), name="input")
out = ClusteringLayer(output_dim = 5, name='clustering')(input1)
model = Model(inputs=input1, outputs=out)
model.compile(optimizer=tf.train.AdamOptimizer(1e-3), loss={'clustering' : clustering_loss})
X = np.random.random((20, 10)).astype(np.float32)
Y = np.random.random((20, 5)).astype(np.float32)
model.fit(x={'input' : X}, y={'clustering' : Y}, batch_size=1, epochs=10)
The error message is related to the "fit" function:
AssertionError: Could not compute output DeferredTensor('None', shape=(5,), dtype=float32)
When I tried to check the output of my custom layer, I was surprised to find that this layer is generating two outputs. The first one is ambiguous and undesired.
Code:
input1 = Input(shape=(10,), name="input")
layer = ClusteringLayer(output_dim = 5, name='clustering')
out = layer(input1)
print(out)
Output:
[<DeferredTensor 'None' shape=(?,) dtype=float32>, <DeferredTensor 'None' shape=(5,) dtype=float32>]
Even when I changed my custom layer with the simplistic custom layer from the Keras documentation, I got the same error:
AssertionError: Could not compute output DeferredTensor('None', shape=(5,), dtype=float32)
I asked the question in GitHub since it seems more like a bug.
They have recommended using a workaround until they fix the internal problem.
I m quoting from here :github
As a workaround, you could wrap the output shape returned by
compute_output_shape in a TensorShape. For example:
TensorShape((input_shape[0], self.output_dim)). Let me know if this
works.
Related
I have a network written in tensorflow keras functional API.
I'd like to use the gradient of one layer w.r.t to the previous layer as input for another layer.
I tried gradient tape and tf.gradients and none of them worked. I get the following error:
ValueError: tf.function-decorated function tried to create variables on non-first call.
There is no input at this point and I have input layer.
Is it possible to do this in tenserflow?
My code:
def Geo_branch(self, geo_inp):
Fully_Connected1 = layers.TimeDistributed(layers.Dense(128, activation='tanh'))(geo_inp)
Fully_Connected2 = layers.TimeDistributed(layers.Dense(64, activation='tanh'))(Fully_Connected1)
return Fully_Connected2
#tf.function
def geo_extension(self, geo_branch):
Fully_Connected = layers.TimeDistributed(layers.Dense(100, activation='tanh'))(geo_branch)
geo_ext = layers.LSTM(6,
activation="tanh",
recurrent_activation="sigmoid",
unroll=False,
use_bias=True,
name='Translation'
)(Fully_Connected)
grads = tf.gradients(geo_ext, geo_branch)
return geo_ext, grads
inp_geo = layers.Input(shape=(self.time_size, 6), name='geo_input')
Geo_branch = Geo_branch(inp_geo)
geo_ext, grads = geo_extension(Geo_branch)
Any solution is appreciated. It doesn't have to be GradientTape, if there is any other way to compute these gradients.
I would just inherit from tensorflow's Layer class and creating your own custom Layer. Also, it would probably be beneficial to put everything under one call so as to minimize the likelihood that there are disconnections in the graph.
Example:
import tensorflow as tf
from typing import List
from typing import Optional
from typing import Tuple
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import TimeDistributed
class CustomGeoLayer(Layer):
"""``CustomGeoLayer``."""
def __init__(self, num_units: List[int], name: Optional[str] = None):
super().__init__(name=name)
self.num_units = num_units
self.dense_0 = TimeDistributed(Dense(num_units[0], activation="tanh"))
self.dense_1 = TimeDistributed(Dense(num_units[1], activation="tanh"))
self.dense_2 = TimeDistributed(Dense(num_units[2], activation="tanh"))
self.rnn = LSTM(units=num_units[3], activation="tanh",
recurrent_activation="sigmoid",
unroll=False, use_bias=True,
name="Translation")
#tf.function
def call(self,
input_tensor: tf.Tensor,
training: bool = True) -> Tuple[tf.Tensor, tf.Tensor]:
x = self.dense_0(input_tensor)
x = self.dense_1(x)
r = self.dense_2(x)
x = self.rnn(r, training=training)
return x, tf.gradients(x, r)[0]
# create model
x_in = Input(shape=(10, 6))
x_out = CustomGeoLayer([128, 64, 100, 6])(x_in)
model = Model(x_in, x_out)
# fake input data
arr = tf.random.normal((3, 10, 6))
# forward pass
out, g = model(arr)
print(out.shape)
# (3, 6)
print(g.shape)
# (3, 10, 100)
I am more or less new to the field of neural networks and python, just a couple of months of work.
I am interested in this case developed in matlab https://it.mathworks.com/help/images/image-processing-operator-approximation-using-deep-learning.html
However, I would like to try to implement this using Keras.
I have three questions regarding the two custom layers this net uses, whose codes are found here:
https://github.com/catsymptote/Salsa_cryptanalysis/blob/master/matlab/workspace/adaptiveNormalizationMu.m
https://github.com/catsymptote/Salsa_cryptanalysis/blob/master/matlab/workspace/adaptiveNormalizationLambda.m
I have not really/deeply understood what these layers actually do
Is my temptative implementation of adaptiveNormalizationMu correct on Keras? Based on what I
understood, this layer just multiplies the output of the BN layer for an adaptive scale
parameter, mu. I wrote the code following the example reported here
https://www.tutorialspoint.com/keras/keras_customized_layer.htm
I am struggling with the variables input_shape and output_shape of the code I wrote following the tutorial.
Considering batch size BS, images with dimensions dim1 and dim2, 1 channel, I would love the input to have dimension (BS, dim1, dim2, 1), and output to have the same, since it is a mere scaling. How to be coherent with the code written in matlab in the mathworks example, where the only input argument is numberOfFilters? I don't know where to introduce this parameter in the code I am trying to write. I would love not to fix the input dimension, so that I can re-use this layer at different depths of the network, but correctly choose the "depht" (like the number of filters for a standard conv2D layer)
Thank you so much for the help
F.
###
from keras import backend as K
from keras.layers import Layer
class MyAdaptiveNormalizationMu(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyAdaptiveNormalizationMu, self).__init__(**kwargs)
def build(self, input_shape):
self.mu = self.add_weight(name = 'mu',
shape = (input_shape[1], self.output_dim),
initializer = 'random_normal', trainable = True)
super(MyAdaptiveNormalizationMu, self).build(input_shape)
def call(self, input_data):
return input_data * self.mu
def compute_output_shape(self, input_shape): return (input_shape[0], self.output_dim)
from keras.models import Sequential
batch_size = 16
dim1 = 8
dim2 = 8
channels = 1
input_shape = (batch_size, dim1, dim2, channels)
output_shape = input_shape
model = Sequential()
model.add(MyAdaptiveNormalizationMu(output_dim=?, input_shape=?))
EDIT: I provide a second realization attempt, which seems to compile. It should do what I think adaptiveNormalizationLambda and adaptiveNormalizationMu do: multiply the input for a learnable weight matrix. However, i am still unsure if the layer is doing what it is supposed to, and if I got correctly the sense of those layers.
from keras.layers import Layer, Input
from keras.models import Model
import numpy as np
class Multiply_Weights(Layer):
def __init__(self, **kwargs):
super(Multiply_Weights, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[1], input_shape[2]),
initializer='RandomNormal',
trainable=True)
super(Multiply_Weights, self).build(input_shape)
def call(self, x, **kwargs):
# Implicit broadcasting occurs here.
# Shape x: (BATCH_SIZE, N, M)
# Shape kernel: (N, M)
# Shape output: (BATCH_SIZE, N, M)
return x * self.kernel
def compute_output_shape(self, input_shape):
return input_shape
N = 3
M = 4
BATCH_SIZE = 1
a = Input(shape=(N, M))
layer = Multiply_Weights()(a)
model = Model(inputs=a,
outputs=layer)
a = np.ones(shape=(BATCH_SIZE, N, M))
pred = model.predict(a)
print(pred)
I've created the following Keras custom model:
import tensorflow as tf
from tensorflow.keras.layers import Layer
class MyModel(tf.keras.Model):
def __init__(self, num_classes):
super(MyModel, self).__init__()
self.dense_layer = tf.keras.layers.Dense(num_classes,activation='softmax')
self.lambda_layer = tf.keras.layers.Lambda(lambda x: tf.math.argmax(x, axis=-1))
def call(self, inputs):
x = self.dense_layer(inputs)
x = self.lambda_layer(x)
return x
# A convenient way to get model summary
# and plot in subclassed api
def build_graph(self, raw_shape):
x = tf.keras.layers.Input(shape=(raw_shape))
return tf.keras.Model(inputs=[x],
outputs=self.call(x))
The task is multi-class classification.
Model consists of a dense layer with softmax activation and a lambda layer as a post-processing unit that converts the dense output vector to a single value (predicted class).
The train targets are a one-hot encoded matrix like so:
[
[0,0,0,0,1]
[0,0,1,0,0]
[0,0,0,1,0]
[0,0,0,0,1]
]
It would be nice if I could define a categorical_crossentropy loss over the dense layer and ignore the lambda layer while still maintaining the functionality and outputting a single value when I call model.predict(x).
Please note
My workspace environment doesn't allow me to use a custom training loop as suggested by #alonetogether excellent answer.
You can try using a custom training loop, which is pretty straightforward IMO:
import tensorflow as tf
from tensorflow.keras.layers import Layer
class MyModel(tf.keras.Model):
def __init__(self, num_classes):
super(MyModel, self).__init__()
self.dense_layer = tf.keras.layers.Dense(num_classes,activation='softmax')
self.lambda_layer = tf.keras.layers.Lambda(lambda x: tf.math.argmax(x, axis=-1))
def call(self, inputs):
x = self.dense_layer(inputs)
x = self.lambda_layer(x)
return x
# A convenient way to get model summary
# and plot in subclassed api
def build_graph(self, raw_shape):
x = tf.keras.layers.Input(shape=(raw_shape))
return tf.keras.Model(inputs=[x],
outputs=self.call(x))
n_classes = 5
model = MyModel(n_classes)
labels = tf.keras.utils.to_categorical(tf.random.uniform((50, 1), maxval=5, dtype=tf.int32))
train_dataset = tf.data.Dataset.from_tensor_slices((tf.random.normal((50, 1)), labels)).batch(2)
optimizer = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.CategoricalCrossentropy()
epochs = 2
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
with tf.GradientTape() as tape:
logits = model.layers[0](x_batch_train)
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
And prediction:
print(model.predict(tf.random.normal((1, 1))))
[3]
I think there is a Model.predict_classes function that would replace the need for that lambda layer. But if it doesn't work:
There doesn't seem to be a way to do that without using one of these hacks:
Two inputs (one is the groud truth values Y)
Two outputs
Two models
I'm quite convinced there is no other workaround for this.
So, I believe the "two models" version is the best for your case where you seem to "need" a model with single input, single output and fit.
Then I'd do this:
inputs = tf.keras.layers.Input(input_shape_without_batch_size)
loss_outputs = tf.keras.layers.Dense(num_classes,activation='softmax')(inputs)
final_outputs = tf.keras.layers.Lambda(lambda x: tf.math.argmax(x, axis=-1))(loss_outputs)
training_model = tf.keras.models.Model(inputs, loss_outputs)
final_model = tf.keras.models.Model(inputs, final_outputs)
training_model.compile(.....)
training_model.fit(....)
results = final_model.predict(...)
Is there any layer in keras which calculates the derivative wrt input? For example if x is input, the first layer is say f(x), then the next layer's output should be f'(x). There are multiple question here about this topic but all of them involve computation of derivative outside the model. In essence, I want to create a neural network whose loss function involves both the jacobian and hessians wrt the inputs.
I've tried the following
import keras.backend as K
def create_model():
x = keras.Input(shape = (10,))
layer = Dense(1, activation = "sigmoid")
output = layer(x)
jac = K.gradients(output, x)
model = keras.Model(inputs=x, outputs=jac)
return model
model = create_model()
X = np.random.uniform(size = (3, 10))
This is gives the error tf.gradients is not supported when eager execution is enabled. Use tf.GradientTape instead.
So I tried using that
def create_model2():
with tf.GradientTape() as tape:
x = keras.Input(shape = (10,))
layer = Dense(1, activation = "sigmoid")
output = layer(x)
jac = tape.gradient(output, x)
model = keras.Model(inputs=x, outputs=jac)
return model
model = create_model2()
X = np.random.uniform(size = (3, 10))
but this tells me 'KerasTensor' object has no attribute '_id'
Both these methods work fine outside the model. My end goal is to use the Jacobian and Hessian in the loss function, so alternative approaches would also be appreciated
Not sure what exactly you want to do, but maybe try a custom Keras layer with tf.gradients:
import tensorflow as tf
tf.random.set_seed(111)
class GradientLayer(tf.keras.layers.Layer):
def __init__(self):
super(GradientLayer, self).__init__()
self.dense = tf.keras.layers.Dense(1, activation = "sigmoid")
#tf.function
def call(self, inputs):
outputs = self.dense(inputs)
return tf.gradients(outputs, inputs)
def create_model2():
gradient_layer = GradientLayer()
inputs = tf.keras.layers.Input(shape = (10,))
outputs = gradient_layer(inputs)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
return model
model = create_model2()
X = tf.random.uniform((3, 10))
print(model(X))
tf.Tensor(
[[-0.07935508 -0.12471244 -0.0702782 -0.06729251 0.14465885 -0.0818079
-0.08996294 0.07622238 0.11422144 -0.08126545]
[-0.08666676 -0.13620329 -0.07675356 -0.07349276 0.15798753 -0.08934557
-0.09825202 0.08324542 0.12474566 -0.08875315]
[-0.08661086 -0.13611545 -0.07670406 -0.07344536 0.15788564 -0.08928795
-0.09818865 0.08319173 0.12466521 -0.08869591]], shape=(3, 10), dtype=float32)
I'm using Tensorflow 2 and I need to build a multi-input multi-output model, and my data is timeseries data, which does not have a consistent shape for its time dimension. I've tried many ways but none worked due to the inconsistent shape.
There are three data and one of them is used twice. They have the format of (number of files, None, 5), with the None dimension to be the inconsistent dimension.
Here's some testing codes that reproduce my issues, and I'm using a generator in this case, but feel free to change to whatever method. Could someone help me with this input pipeline?
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
dummy_1 = [[[1.1,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7]],
[[1.2,2,3,4,5],[2,3,4,5,6.8]],
[[1.3,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7],[4,5,6,7,8.9]]]
dummy_2 = [[[1.1,2,3,4,5],[2,3,4,5,6]],
[[1.1,2,3,4,5],[2,3,4,5,6]],[3,4,5,6,7],
[[1.3,2,3,4,5],[2,3,4,5,6]]]
dummy_3 = [[[1.5,2,3,4,5],[2,3,4,5,6]],
[[1.6,2,3,4,5],[2,3,4,5,6]],[3,4,5,6,7],
[[1.7,2,3,4,5],[2,3,4,5,6]]]
def gen():
for i in range(len(dummy_1)):
yield(dummy_1[i],dummy_2[i],dummy_2[i],dummy_3[i])
def custom_loss(y_true, y_pred):
return tf.reduce_mean(tf.abs(y_pred - y_true))
class network():
def __init__(self):
input_1 = keras.Input(shape=(None,5))
input_2 = keras.Input(shape=(None,5))
output_1 = layers.Conv1DTranspose(16, 3, padding='same', activation='relu')(input_1)
output_2 = layers.Conv1DTranspose(16, 3, padding='same', activation='relu')(input_2)
self.model = keras.Model(inputs=[input_1, input_2],
outputs=[output_1, output_2])
# compile model
self.model.compile(optimizer=keras.optimizers.SGD(learning_rate=0.001),
loss={"mel_loss":custom_loss, "mag_loss":custom_loss})
def train(self):
self.dataset = tf.data.Dataset.from_generator(gen,
(tf.float32, tf.float32, tf.float32, tf.float32))
self.dataset.batch(32).repeat()
self.model.fit(self.dataset,epochs=3)
#self.model.fit([dummy_1, dummy_2],
# [dummy_2, dummy_3],
# epochs=3)
net = network()
net.train()
This is not possible currently for TF2, referring to https://github.com/tensorflow/tensorflow/issues/45112