I am trying to concatenate the hidden units. For example, I have 3 units, h1,h2,h3 then I want the new layer to have [h1;h1],[h1;h2],[h1;h3],[h2;h1]....
So, I have tried:
class MyLayer(Layer):
def __init__(self,W_regularizer=None,W_constraint=None, **kwargs):
self.init = initializers.get('glorot_uniform')
self.W_regularizer = regularizers.get(W_regularizer)
self.W_constraint = constraints.get(W_constraint)
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
# Create a trainable weight variable for this layer.
self.W = self.add_weight((input_shape[-1],input_shape[-1]),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint,
trainable=True)
super(MyLayer, self).build(input_shape)
def call(self, x,input_shape):
conc=K.concatenate([x[:, :-1, :], x[:, 1:, :]],axis=1)# help needed here
uit = K.dot(conc, self.W)# W has input_shape[-1],input_shape[-1]
return uit
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[1],input_shape[-1]
I am not sure what should I return for the second argument of my output shape.
from keras.layers import Input, Lambda, LSTM
from keras.models import Model
import keras.backend as K
from keras.layers import Lambda
lstm=LSTM(64, return_sequences=True)(input)
something=MyLayer()(lstm)
You could implement the concatenation operation that you described by leveraging itertools.product in order to compute the cartesian product of the temporal dimension's indices. The call method could be coded as follows:
def call(self, x):
prod = product(range(nb_timesteps), repeat=2)
conc_prod = []
for i, j in prod:
c = K.concatenate([x[:, i, :], x[:, j, :]], axis=-1) # Shape=(batch_size, 2*nb_features)
c_expanded = c[:, None, :] # Shape=(batch_size, 1, 2*nb_features)
conc_prod.append(c_expanded)
conc = K.concatenate(conc_prod, axis=1) # Shape=(batch_size, nb_timesteps**2, 2*nb_features)
uit = K.dot(conc, self.W) # W has shape 2*input_shape[-1], input_shape[-1]
return uit # Shape=(batch_size, nb_timesteps**2, nb_features)
In the example that you provided, nb_timesteps would be 3. Note also that the shape of the weights should be (2*input_shape[-1], input_shape[-1]) for the dot product to be valid.
Disclaimer: I am not sure what you want to achieve.
Related
I am more or less new to the field of neural networks and python, just a couple of months of work.
I am interested in this case developed in matlab https://it.mathworks.com/help/images/image-processing-operator-approximation-using-deep-learning.html
However, I would like to try to implement this using Keras.
I have three questions regarding the two custom layers this net uses, whose codes are found here:
https://github.com/catsymptote/Salsa_cryptanalysis/blob/master/matlab/workspace/adaptiveNormalizationMu.m
https://github.com/catsymptote/Salsa_cryptanalysis/blob/master/matlab/workspace/adaptiveNormalizationLambda.m
I have not really/deeply understood what these layers actually do
Is my temptative implementation of adaptiveNormalizationMu correct on Keras? Based on what I
understood, this layer just multiplies the output of the BN layer for an adaptive scale
parameter, mu. I wrote the code following the example reported here
https://www.tutorialspoint.com/keras/keras_customized_layer.htm
I am struggling with the variables input_shape and output_shape of the code I wrote following the tutorial.
Considering batch size BS, images with dimensions dim1 and dim2, 1 channel, I would love the input to have dimension (BS, dim1, dim2, 1), and output to have the same, since it is a mere scaling. How to be coherent with the code written in matlab in the mathworks example, where the only input argument is numberOfFilters? I don't know where to introduce this parameter in the code I am trying to write. I would love not to fix the input dimension, so that I can re-use this layer at different depths of the network, but correctly choose the "depht" (like the number of filters for a standard conv2D layer)
Thank you so much for the help
F.
###
from keras import backend as K
from keras.layers import Layer
class MyAdaptiveNormalizationMu(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyAdaptiveNormalizationMu, self).__init__(**kwargs)
def build(self, input_shape):
self.mu = self.add_weight(name = 'mu',
shape = (input_shape[1], self.output_dim),
initializer = 'random_normal', trainable = True)
super(MyAdaptiveNormalizationMu, self).build(input_shape)
def call(self, input_data):
return input_data * self.mu
def compute_output_shape(self, input_shape): return (input_shape[0], self.output_dim)
from keras.models import Sequential
batch_size = 16
dim1 = 8
dim2 = 8
channels = 1
input_shape = (batch_size, dim1, dim2, channels)
output_shape = input_shape
model = Sequential()
model.add(MyAdaptiveNormalizationMu(output_dim=?, input_shape=?))
EDIT: I provide a second realization attempt, which seems to compile. It should do what I think adaptiveNormalizationLambda and adaptiveNormalizationMu do: multiply the input for a learnable weight matrix. However, i am still unsure if the layer is doing what it is supposed to, and if I got correctly the sense of those layers.
from keras.layers import Layer, Input
from keras.models import Model
import numpy as np
class Multiply_Weights(Layer):
def __init__(self, **kwargs):
super(Multiply_Weights, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[1], input_shape[2]),
initializer='RandomNormal',
trainable=True)
super(Multiply_Weights, self).build(input_shape)
def call(self, x, **kwargs):
# Implicit broadcasting occurs here.
# Shape x: (BATCH_SIZE, N, M)
# Shape kernel: (N, M)
# Shape output: (BATCH_SIZE, N, M)
return x * self.kernel
def compute_output_shape(self, input_shape):
return input_shape
N = 3
M = 4
BATCH_SIZE = 1
a = Input(shape=(N, M))
layer = Multiply_Weights()(a)
model = Model(inputs=a,
outputs=layer)
a = np.ones(shape=(BATCH_SIZE, N, M))
pred = model.predict(a)
print(pred)
I have trained an RNN model with pytorch. I need to use the model for prediction in an environment where I'm unable to install pytorch because of some strange dependency issue with glibc. However, I can install numpy and scipy and other libraries. So, I want to use the trained model, with the network definition, without pytorch.
I have the weights of the model as I save the model with its state dict and weights in the standard way, but I can also save it using just json/pickle files or similar.
I also have the network definition, which depends on pytorch in a number of ways. This is my RNN network definition.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
torch.manual_seed(1)
random.seed(1)
device = torch.device('cpu')
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size,num_layers, matching_in_out=False, batch_size=1):
super(RNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.num_layers = num_layers
self.batch_size = batch_size
self.matching_in_out = matching_in_out #length of input vector matches the length of output vector
self.lstm = nn.LSTM(input_size, hidden_size,num_layers)
self.hidden2out = nn.Linear(hidden_size, output_size)
self.hidden = self.init_hidden()
def forward(self, feature_list):
feature_list=torch.tensor(feature_list)
if self.matching_in_out:
lstm_out, _ = self.lstm( feature_list.view(len( feature_list), 1, -1))
output_space = self.hidden2out(lstm_out.view(len( feature_list), -1))
output_scores = torch.sigmoid(output_space) #we'll need to check if we need this sigmoid
return output_scores #output_scores
else:
for i in range(len(feature_list)):
cur_ft_tensor=feature_list[i]#.view([1,1,self.input_size])
cur_ft_tensor=cur_ft_tensor.view([1,1,self.input_size])
lstm_out, self.hidden = self.lstm(cur_ft_tensor, self.hidden)
outs=self.hidden2out(lstm_out)
return outs
def init_hidden(self):
#return torch.rand(self.num_layers, self.batch_size, self.hidden_size)
return (torch.rand(self.num_layers, self.batch_size, self.hidden_size).to(device),
torch.rand(self.num_layers, self.batch_size, self.hidden_size).to(device))
I am aware of this question, but I'm willing to go as low level as possible. I can work with numpy array instead of tensors, and reshape instead of view, and I don't need a device setting.
Based on the class definition above, what I can see here is that I only need the following components from torch to get an output from the forward function:
nn.LSTM
nn.Linear
torch.sigmoid
I think I can easily implement the sigmoid function using numpy. However, can I have some implementation for the nn.LSTM and nn.Linear using something not involving pytorch? Also, how will I use the weights from the state dict into the new class?
So, the question is, how can I "translate" this RNN definition into a class that doesn't need pytorch, and how to use the state dict weights for it?
Alternatively, is there a "light" version of pytorch, that I can use just to run the model and yield a result?
EDIT
I think it might be useful to include the numpy/scipy equivalent for both nn.LSTM and nn.linear. It would help us compare the numpy output to torch output for the same code, and give us some modular code/functions to use. Specifically, a numpy equivalent for the following would be great:
rnn = nn.LSTM(10, 20, 2)
input = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)
output, (hn, cn) = rnn(input, (h0, c0))
and also for linear:
m = nn.Linear(20, 30)
input = torch.randn(128, 20)
output = m(input)
You should try to export the model using torch.onnx. The page gives you an example that you can start with.
An alternative is to use TorchScript, but that requires torch libraries.
Both of these can be run without python. You can load torchscript in a C++ application https://pytorch.org/tutorials/advanced/cpp_export.html
ONNX is much more portable and you can use in languages such as C#, Java, or Javascript
https://onnxruntime.ai/ (even on the browser)
A running example
Just modifying a little your example to go over the errors I found
Notice that via tracing any if/elif/else, for, while will be unrolled
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
torch.manual_seed(1)
random.seed(1)
device = torch.device('cpu')
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size,num_layers, matching_in_out=False, batch_size=1):
super(RNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.num_layers = num_layers
self.batch_size = batch_size
self.matching_in_out = matching_in_out #length of input vector matches the length of output vector
self.lstm = nn.LSTM(input_size, hidden_size,num_layers)
self.hidden2out = nn.Linear(hidden_size, output_size)
def forward(self, x, h0, c0):
lstm_out, (hidden_a, hidden_b) = self.lstm(x, (h0, c0))
outs=self.hidden2out(lstm_out)
return outs, (hidden_a, hidden_b)
def init_hidden(self):
#return torch.rand(self.num_layers, self.batch_size, self.hidden_size)
return (torch.rand(self.num_layers, self.batch_size, self.hidden_size).to(device).detach(),
torch.rand(self.num_layers, self.batch_size, self.hidden_size).to(device).detach())
# convert the arguments passed during onnx.export call
class MWrapper(nn.Module):
def __init__(self, model):
super(MWrapper, self).__init__()
self.model = model;
def forward(self, kwargs):
return self.model(**kwargs)
Run an example
rnn = RNN(10, 10, 10, 3)
X = torch.randn(3,1,10)
h0,c0 = rnn.init_hidden()
print(rnn(X, h0, c0)[0])
Use the same input to trace the model and export an onnx file
torch.onnx.export(MWrapper(rnn), {'x':X,'h0':h0,'c0':c0}, 'rnn.onnx',
dynamic_axes={'x':{1:'N'},
'c0':{1: 'N'},
'h0':{1: 'N'}
},
input_names=['x', 'h0', 'c0'],
output_names=['y', 'hn', 'cn']
)
Notice that you can use symbolic values for the dimensions of some axes of some inputs. Unspecified dimensions will be fixed with the values from the traced inputs. By default LSTM uses dimension 1 as batch.
Next we load the ONNX model and pass the same inputs
import onnxruntime
ort_model = onnxruntime.InferenceSession('rnn.onnx')
print(ort_model.run(['y'], {'x':X.numpy(), 'c0':c0.numpy(), 'h0':h0.numpy()}))
Basically implementing it in numpy and copying weights from your pytorch model can do the trick. For your usecase you will only need to do a forward pass so we just need to implement that only
#Set Parameters for a small LSTM network
input_size = 2 # size of one 'event', or sample, in our batch of data
hidden_dim = 3 # 3 cells in the LSTM layer
output_size = 1 # desired model output
num_layers=3
torch_lstm = RNN( input_size,
hidden_dim ,
output_size,
num_layers,
matching_in_out=True
)
state = torch_lstm.state_dict() # state will capture the weights of your model
Now for LSTM in numpy these functions will be used:
got the below code from this link: https://towardsdatascience.com/the-lstm-reference-card-6163ca98ae87
### NOT MY CODE
import numpy as np
from scipy.special import expit as sigmoid
def forget_gate(x, h, Weights_hf, Bias_hf, Weights_xf, Bias_xf, prev_cell_state):
forget_hidden = np.dot(Weights_hf, h) + Bias_hf
forget_eventx = np.dot(Weights_xf, x) + Bias_xf
return np.multiply( sigmoid(forget_hidden + forget_eventx), prev_cell_state )
def input_gate(x, h, Weights_hi, Bias_hi, Weights_xi, Bias_xi, Weights_hl, Bias_hl, Weights_xl, Bias_xl):
ignore_hidden = np.dot(Weights_hi, h) + Bias_hi
ignore_eventx = np.dot(Weights_xi, x) + Bias_xi
learn_hidden = np.dot(Weights_hl, h) + Bias_hl
learn_eventx = np.dot(Weights_xl, x) + Bias_xl
return np.multiply( sigmoid(ignore_eventx + ignore_hidden), np.tanh(learn_eventx + learn_hidden) )
def cell_state(forget_gate_output, input_gate_output):
return forget_gate_output + input_gate_output
def output_gate(x, h, Weights_ho, Bias_ho, Weights_xo, Bias_xo, cell_state):
out_hidden = np.dot(Weights_ho, h) + Bias_ho
out_eventx = np.dot(Weights_xo, x) + Bias_xo
return np.multiply( sigmoid(out_eventx + out_hidden), np.tanh(cell_state) )
We would need the sigmoid function as well so
def sigmoid(x):
return 1/(1 + np.exp(-x))
Because pytorch stores weights in stacked manner so we need to break it up for that we would need the below function
def get_slices(hidden_dim):
slices=[]
breaker=(hidden_dim*4)
slices=[[i,i+3] for i in range(0, breaker, breaker//4)]
return slices
Now we have the functions ready for lstm, now we create an lstm class to copy the weights from pytorch class and get the output from it.
class numpy_lstm:
def __init__( self, layer_num=0, hidden_dim=1, matching_in_out=False):
self.matching_in_out=matching_in_out
self.layer_num=layer_num
self.hidden_dim=hidden_dim
def init_weights_from_pytorch(self, state):
slices=get_slices(self.hidden_dim)
print (slices)
#Event (x) Weights and Biases for all gates
lstm_weight_ih='lstm.weight_ih_l'+str(self.layer_num)
self.Weights_xi = state[lstm_weight_ih][slices[0][0]:slices[0][1]].numpy() # shape [h, x]
self.Weights_xf = state[lstm_weight_ih][slices[1][0]:slices[1][1]].numpy() # shape [h, x]
self.Weights_xl = state[lstm_weight_ih][slices[2][0]:slices[2][1]].numpy() # shape [h, x]
self.Weights_xo = state[lstm_weight_ih][slices[3][0]:slices[3][1]].numpy() # shape [h, x]
lstm_bias_ih='lstm.bias_ih_l'+str(self.layer_num)
self.Bias_xi = state[lstm_bias_ih][slices[0][0]:slices[0][1]].numpy() #shape is [h, 1]
self.Bias_xf = state[lstm_bias_ih][slices[1][0]:slices[1][1]].numpy() #shape is [h, 1]
self.Bias_xl = state[lstm_bias_ih][slices[2][0]:slices[2][1]].numpy() #shape is [h, 1]
self.Bias_xo = state[lstm_bias_ih][slices[3][0]:slices[3][1]].numpy() #shape is [h, 1]
lstm_weight_hh='lstm.weight_hh_l'+str(self.layer_num)
#Hidden state (h) Weights and Biases for all gates
self.Weights_hi = state[lstm_weight_hh][slices[0][0]:slices[0][1]].numpy() #shape is [h, h]
self.Weights_hf = state[lstm_weight_hh][slices[1][0]:slices[1][1]].numpy() #shape is [h, h]
self.Weights_hl = state[lstm_weight_hh][slices[2][0]:slices[2][1]].numpy() #shape is [h, h]
self.Weights_ho = state[lstm_weight_hh][slices[3][0]:slices[3][1]].numpy() #shape is [h, h]
lstm_bias_hh='lstm.bias_hh_l'+str(self.layer_num)
self.Bias_hi = state[lstm_bias_hh][slices[0][0]:slices[0][1]].numpy() #shape is [h, 1]
self.Bias_hf = state[lstm_bias_hh][slices[1][0]:slices[1][1]].numpy() #shape is [h, 1]
self.Bias_hl = state[lstm_bias_hh][slices[2][0]:slices[2][1]].numpy() #shape is [h, 1]
self.Bias_ho = state[lstm_bias_hh][slices[3][0]:slices[3][1]].numpy() #shape is [h, 1]
def forward_lstm_pass(self,input_data):
h = np.zeros(self.hidden_dim)
c = np.zeros(self.hidden_dim)
output_list=[]
for eventx in input_data:
f = forget_gate(eventx, h, self.Weights_hf, self.Bias_hf, self.Weights_xf, self.Bias_xf, c)
i = input_gate(eventx, h, self.Weights_hi, self.Bias_hi, self.Weights_xi, self.Bias_xi,
self.Weights_hl, self.Bias_hl, self.Weights_xl, self.Bias_xl)
c = cell_state(f,i)
h = output_gate(eventx, h, self.Weights_ho, self.Bias_ho, self.Weights_xo, self.Bias_xo, c)
if self.matching_in_out: # doesnt make sense but it was as it was in main code :(
output_list.append(h)
if self.matching_in_out:
return output_list
else:
return h
Similarly for fully connected layer,
class fully_connected_layer:
def __init__(self,state, dict_name='fc', ):
self.fc_Weight = state[dict_name+'.weight'][0].numpy()
self.fc_Bias = state[dict_name+'.bias'][0].numpy() #shape is [,output_size]
def forward(self,lstm_output, is_sigmoid=True):
res=np.dot(self.fc_Weight, lstm_output)+self.fc_Bias
print (res)
if is_sigmoid:
return sigmoid(res)
else:
return res
Now we would need one class to call all of them together and generalise them with respect to multiple layers
You can modify the below class if you need more Fully connected layers or want to set false condition for sigmoid etc.
class RNN_model_Numpy:
def __init__(self, state, input_size, hidden_dim, output_size, num_layers, matching_in_out=True):
self.lstm_layers=[]
for i in range(0, num_layers):
lstm_layer_obj=numpy_lstm(layer_num=i, hidden_dim=hidden_dim, matching_in_out=True)
lstm_layer_obj.init_weights_from_pytorch(state)
self.lstm_layers.append(lstm_layer_obj)
self.hidden2out=fully_connected_layer(state, dict_name='hidden2out')
def forward(self, feature_list):
for x in self.lstm_layers:
lstm_output=x.forward_lstm_pass(feature_list)
feature_list=lstm_output
return self.hidden2out.forward(feature_list, is_sigmoid=False)
Sanity check on a numpy variable:
data = np.array(
[[1,1],
[2,2],
[3,3]])
check=RNN_model_Numpy(state, input_size, hidden_dim, output_size, num_layers)
check.forward(data)
EXPLANATION:
Since we just need forward pass, we would need certain functions that are required in LSTM, for that we have the forget gate, input gate, cell gate and output gate. They are just some operations that are done on the input that you give.
For get_slices function, this is used to break down the weight matrix that we get from pytorch state dictionary (state dictionary) is the dictionary which contains the weights of all the layers that we have in our network.
For LSTM particularly have it in this order ignore, forget, learn, output. So for that we would need to break it up for different LSTM cells.
For numpy_lstm class, we have init_weights_from_pytorch function which must be called, what it will do is that it will extract the weights from state dictionary which we got earlier from pytorch model object and then populate the numpy array weights with the pytorch weights. You can first train your model and then save the state dictionary through pickle and then use it.
The fully connected layer class just implements the hidden2out neural network.
Finally our rnn_model_numpy class is there to ensure that if you have multiple layers then it is able to send the output of one layer of lstm to other layer of lstm.
Lastly there is a small sanity check on data variable.
IMPORTANT NOTE: PLEASE NOTE THAT YOU MIGHT GET DIMENSION ERROR AS PYTORCH WAY OF HANDLING INPUT IS COMPLETELY DIFFERENT SO PLEASE ENSURE THAT YOU INPUT NUMPY IS OF SIMILAR SHAPE AS DATA VARIABLE.
Important references:
https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
https://christinakouridi.blog/2019/06/19/backpropagation-lstm/
In a default Conv2D layer with kernel_size=3 the weights of a slice of one of the filters could be named like this:
A B C
D E F
G H I
With kernel_size=5 like this:
A B C D E
F G H I J
K L M N O
P Q R S T
U V W X Y
Now I'd like to build (and train/test) a model based on conv layers with kernels like that:
A A B C C
A A B C C
D D E F F
G G H I I
G G H I I
How could the implementation of such a custom layer look like?
Maybe like this?
class CustomConv2D(Layer):
def __init__(self, filters, **kwargs):
self.filters = filters
self.kernel_size = (3, 3)
super(CustomConv2D, self).__init__(**kwargs)
def build(self, input_shape):
# only have a 3x3 kernel
shape = self.kernel_size + (input_shape[-1], self.filters)
self.kernel = self.add_weight(name='kernel', shape=shape,
initializer='glorot_uniform')
super(CustomConv2D, self).build(input_shape)
def call(self, x):
# duplicate rows 0 and 2
dup_rows = K.stack([self.kernel[0]]*2 + [self.kernel[1]] + [self.kernel[2]]*2, axis=0)
# duplicate cols 0 and 2
dup_cols = K.stack([dup_rows[:,0]]*2 + [dup_rows[:,1]] + [dup_rows[:,2]]*2, axis=1)
# having a 5x5 kernel now
return K.conv2d(x, dup_cols)
def compute_output_shape(self, input_shape):
return input_shape[:-1] + (self.filters,)
The trick is to simply store only 9 weights per filter in a 3x3 kernel (hard coded, you may want to generalize it) and to duplicate the first and last rows and columns to make it a 5x5 kernel the way you want it. Then this kernel is passed to K.conv2d() just like in the original Conv2d implementation.
I tested it and it seems to be working. You may want to add other parameters like padding, bias, etc.
I think this does a basic version of what you intend:
from keras import backend as K
class Conv2DTiledKernel(Layer):
def __init__(self, filters, kernel_size, multiplies, **kwargs):
self.filters = filters
self.kernel_size = kernel_size
self.multiplies = multiplies
super(Conv2DTiledKernel, self).__init__(**kwargs)
def build(self, input_shape):
shape = list(self.kernel_size) + [input_shape[-1], self.filters]
self.kernel = self.add_weight(name='kernel', shape=shape,
initializer='glorot_uniform')
super(Conv2DTiledKernel, self).build(input_shape)
def call(self, x):
mult = list(self.multiplies) + [1, 1]
kernel_tiled = K.tile(self.kernel, mult)
return K.conv2d(x, kernel_tiled)
def compute_output_shape(self, input_shape):
return input_shape[:-1] + (self.filters,)
fitlers is the number of output channels, kernel_size the size of each kernel channel and multiplies the tiling factors. You would use it something like this:
from keras.models import Model
from keras.layers import Input, Layer
img = Input(shape=(64, 64, 3))
output = Conv2DTiledKernel(10, [1, 5], [5, 1])(img)
model = Model(inputs=img, outputs=output)
This is a pretty basic version, though. You could later add options for bias, regularizer, initalization, padding, strides, dilation, etc. You can look at the source code to see how convolutional layers are implemented in Keras. It would have been ideal if you could just subclass one of the classes so you get all the additional options for free, but I'm not sure it can be done in a practical way as the code currently stands.
I have a simple model with one custom layer which works fine in the normal case.
When I switched to eager execution via tf.enable_eager_execution(), I got stuck on a weird error.
Here is the code so far:
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Input
from tensorflow.keras.losses import kullback_leibler_divergence
tf.enable_eager_execution()
class ClusteringLayer(Layer):
def __init__(self, output_dim, input_dim=None, alpha=1.0, **kwargs):
self.output_dim = output_dim
self.input_dim = input_dim
self.alpha = alpha
super(ClusteringLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='kernel', shape=(self.output_dim, input_shape[1]), initializer='uniform', trainable=True)
super(ClusteringLayer, self).build(input_shape)
def call(self, x, mask=None):
q = 1.0/(1.0 + K.sqrt(K.sum(K.square(K.expand_dims(x, 1) - self.W), axis=2))**2 /self.alpha)
q = q**((self.alpha+1.0)/2.0)
q = K.transpose(K.transpose(q)/K.sum(q, axis=1))
return q
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
def clustering_loss(y_true, y_pred):
a = K.square(y_pred) / K.sum(y_pred, axis=0)
p = K.transpose(K.transpose(a) / K.sum(a, axis=1))
loss = kullback_leibler_divergence(p, y_pred)
return loss
input1 = Input(shape=(10,), name="input")
out = ClusteringLayer(output_dim = 5, name='clustering')(input1)
model = Model(inputs=input1, outputs=out)
model.compile(optimizer=tf.train.AdamOptimizer(1e-3), loss={'clustering' : clustering_loss})
X = np.random.random((20, 10)).astype(np.float32)
Y = np.random.random((20, 5)).astype(np.float32)
model.fit(x={'input' : X}, y={'clustering' : Y}, batch_size=1, epochs=10)
The error message is related to the "fit" function:
AssertionError: Could not compute output DeferredTensor('None', shape=(5,), dtype=float32)
When I tried to check the output of my custom layer, I was surprised to find that this layer is generating two outputs. The first one is ambiguous and undesired.
Code:
input1 = Input(shape=(10,), name="input")
layer = ClusteringLayer(output_dim = 5, name='clustering')
out = layer(input1)
print(out)
Output:
[<DeferredTensor 'None' shape=(?,) dtype=float32>, <DeferredTensor 'None' shape=(5,) dtype=float32>]
Even when I changed my custom layer with the simplistic custom layer from the Keras documentation, I got the same error:
AssertionError: Could not compute output DeferredTensor('None', shape=(5,), dtype=float32)
I asked the question in GitHub since it seems more like a bug.
They have recommended using a workaround until they fix the internal problem.
I m quoting from here :github
As a workaround, you could wrap the output shape returned by
compute_output_shape in a TensorShape. For example:
TensorShape((input_shape[0], self.output_dim)). Let me know if this
works.
I am creating a custom layer with weights that need to be multiplied by element-wise before activation. I can get it to work when the output and input is the same shape. The problem occurs when I have a first order array as input with a second order array as output. tensorflow.multiply supports broadcasting, but when I try to use it in Layer.call(x, self.kernel)
to multiply x by the self.kernel Variable it complains that they are different shapes saying:
ValueError: Dimensions must be equal, but are 4 and 3 for 'my_layer_1/Mul' (op: 'Mul') with input shapes: [?,4], [4,3].
here is my code:
from keras import backend as K
from keras.engine.topology import Layer
import tensorflow as tf
from keras.models import Sequential
import numpy as np
class MyLayer(Layer):
def __init__(self, output_dims, **kwargs):
self.output_dims = output_dims
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=self.output_dims,
initializer='ones',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
#multiply wont work here?
return K.tf.multiply(x, self.kernel)
def compute_output_shape(self, input_shape):
return (self.output_dims)
mInput = np.array([[1,2,3,4]])
inShape = (4,)
net = Sequential()
outShape = (4,3)
l1 = MyLayer(outShape, input_shape= inShape)
net.add(l1)
net.compile(loss='mean_absolute_error', optimizer='adam', metrics=['accuracy'])
p = net.predict(x=mInput, batch_size=1)
print(p)
Edit:
Given input shape (4,) and output shape (4,3) the weight matrix should be the same shape as the output and initialized with ones. So in the above code the input is [1,2,3,4], the weight matrix should be [[1,1,1,1],[1,1,1,1],[1,1,1,1]] and the output should look like [[1,2,3,4],[1,2,3,4],[1,2,3,4]]
Before multiplying, you need to repeat the elements to increase the shape.
You can use K.repeat_elements for that. (import keras.backend as K)
class MyLayer(Layer):
#there are some difficulties for different types of shapes
#let's use a 'repeat_count' instead, increasing only one dimension
def __init__(self, repeat_count,**kwargs):
self.repeat_count = repeat_count
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
#first, let's get the output_shape
output_shape = self.compute_output_shape(input_shape)
weight_shape = (1,) + output_shape[1:] #replace the batch size by 1
self.kernel = self.add_weight(name='kernel',
shape=weight_shape,
initializer='ones',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
#here, we need to repeat the elements before multiplying
def call(self, x):
if self.repeat_count > 1:
#we add the extra dimension:
x = K.expand_dims(x, axis=1)
#we replicate the elements
x = K.repeat_elements(x, rep=self.repeat_count, axis=1)
#multiply
return x * self.kernel
#make sure we comput the ouptut shape according to what we did in "call"
def compute_output_shape(self, input_shape):
if self.repeat_count > 1:
return (input_shape[0],self.repeat_count) + input_shape[1:]
else:
return input_shape
here is another solution that is based on the answer by Daniel Möller, but uses tf.multiply like the original code.
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
output_shape = self.compute_output_shape(input_shape)
self.kernel = self.add_weight(name='kernel',
shape=(1,) + output_shape[1:],
initializer='ones',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
return K.tf.multiply(x, self.kernel)
def compute_output_shape(self, input_shape):
return (input_shape[0],self.output_dim)+input_shape[1:]