I would like to restrict a Dense layer such that only some of the weights are trainable. I have thought about making a custom layer and passing a boolean numpy array of the same shape as the weight matrix to set the corresponding layer weight to be trainable (all other weights I would want to be 0). But I'm not sure how to do this in Tensorflow. Basically I need to know how to make a tensorflow tensor where some entries are constants of 0 and other entries are variables. Below I show my code example.
In the code below
self.M=self.M[trainable_arr]
Obviously doesn't work and I need the correct alternative to this. Any good ideas? There is no simple pattern in which variables I want to be trainable or not.
class sparse_layer(tf.keras.layers.Layer):
def __init__(self, kernel_regularizer=None,latent_d=10,name=None,trainable_arr=None):
super(sparse_layer, self).__init__(name=name)
#self.w = self.add_weight(shape=(input_dim, input_dim), initializer="random_normal", trainable=True)
self.kernel_regularizer1 = tf.keras.regularizers.get(kernel_regularizer)
def build(self, input_shape):
#self.M = self.add_weight(shape=(int(latent_d), int(input_shape[-1])), initializer="random_normal", trainable=True,regularizer=self.kernel_regularizer1)
n=trainable_arr.sum().sum()
w = tf.Variable(tf.zeros(n))
#self.M=tf.constant(tf.zeros( (int(latent_d), int(input_shape[-1])) ))
self.M=tf.Variable(tf.zeros( (int(latent_d), int(input_shape[-1])) ))
self.M=self.M[trainable_arr]
# for i in self.M.shape[0]:
# for j in self.M.shape[1]:
#self.M=tf.multiply(self.M,trainable)
def call(self, inputs):
dx=tf.transpose(inputs)
return tf.transpose(tf.matmul(self.M,dx))
Related
So, I'm trying to create a custom layer in TensorFlow 2.4.1, using a function for a neuron I defined.
# NOTE: this is not the actual neuron I want to use,
# it's just a simple example.
def neuron(x, W, b):
return W # x + b
Where the W and b it gets would be of shape (1, x.shape[0]) and (1, 1) respectively. This means this is like a single neuron in a dense layer. So, I want to create a dense layer by stacking however many of these individual neurons I want.
class Layer(tf.keras.layers.Layer):
def __init__(self, n_units=5):
super(Layer, self).__init__() # handles standard arguments
self.n_units = n_units # Number of neurons to be in the layer
def build(self, input_shape):
# Create weights and biases for all neurons individually
for i in range(self.n_units):
# Create weights and bias for ith neuron
...
def call(self, inputs):
# Compute outputs for all neurons
...
# Concatenate outputs to create layer output
...
return output
How can I create a layer as a stack of individual neurons (also in a way it can train)? I have roughly outlined the idea for the layer in the above code, but the answer doesn't need to follow that as a blueprint.
Finally; yes I'm aware that to create a dense layer you don't need to go about it in such a roundabout way (you just need 1 weight and bias matrix), but in my actual use case, this is neccessary. Thanks!
So, person who asked this question here, I have found a way to do it, by dynamically creating variables and operations.
First, let's re-define the neuron to use tensorflow operations:
def neuron(x, W, b):
return tf.add(tf.matmul(W, x), b)
Then, let's create the layer (this uses the blueprint layed out in the question):
class Layer(tf.keras.layers.Layer):
def __init__(self, n_units=5):
super(Layer, self).__init__()
self.n_units = n_units
def build(self, input_shape):
for i in range(self.n_units):
exec(f'self.kernel_{i} = self.add_weight("kernel_{i}", shape=[1, int(input_shape[0])])')
exec(f'self.bias_{i} = self.add_weight("bias_{i}", shape=[1, 1])')
def call(self, inputs):
for i in range(self.n_units):
exec(f'out_{i} = neuron(inputs, self.kernel_{i}, self.bias_{i})')
return eval(f'tf.concat([{", ".join([ f"out_{i}" for i in range(self.n_units) ])}], axis=0)')
As you can see, we're using exec and eval to dynamically create variables and perform operations.
That's it! We can perform a few checks to see if TensorFlow could use this:
# Check to see if it outputs the correct thing
layer = Layer(5) # With 5 neurons, it should return a (5, 6)
print(layer(tf.zeros([10, 6])))
# Check to see if it has the right trainable parameters
print(layer.trainable_variables)
# Check to see if TensorFlow can find the gradients
layer = Layer(5)
x = tf.ones([10, 6])
with tf.GradientTape() as tape:
z = layer(x)
print(f"Parameter: {layer.trainable_variables[2]}")
print(f"Gradient: {tape.gradient(z, layer.trainable_variables[2])}")
This solution works, but it's not very elegant... I wonder if there's a better way to do it, some magical TF method that can map the neuron to create a layer, I'm too inexperienced to know for the moment. So, please answer if you have a (better) answer, I'll be happy to accept it :)
In TF 1.x, it was possible to build layers with custom variables. Here's an example:
import numpy as np
import tensorflow as tf
def make_custom_getter(custom_variables):
def custom_getter(getter, name, **kwargs):
if name in custom_variables:
variable = custom_variables[name]
else:
variable = getter(name, **kwargs)
return variable
return custom_getter
# Make a custom getter for the dense layer variables.
# Note: custom variables can result from arbitrary computation;
# for the sake of this example, we make them just constant tensors.
custom_variables = {
"model/dense/kernel": tf.constant(
np.random.rand(784, 64), name="custom_kernel", dtype=tf.float32),
"model/dense/bias": tf.constant(
np.random.rand(64), name="custom_bias", dtype=tf.float32),
}
custom_getter = make_custom_getter(custom_variables)
# Compute hiddens using a dense layer with custom variables.
x = tf.random.normal(shape=(1, 784), name="inputs")
with tf.variable_scope("model", custom_getter=custom_getter):
Layer = tf.layers.Dense(64)
hiddens = Layer(x)
print(Layer.variables)
The printed variables of the constructed dense layer will be custom tensors we specified in the custom_variables dict:
[<tf.Tensor 'custom_kernel:0' shape=(784, 64) dtype=float32>, <tf.Tensor 'custom_bias:0' shape=(64,) dtype=float32>]
This allows us to create layers/models that use provided tensors in custom_variables directly as their weights, so that we could further differentiate the output of the layers/models with respect to any tensors that custom_variables may depend on (particularly useful for implementing functionality in modulating sub-nets, parameter generation, meta-learning, etc.).
Variable scopes used to make it easy to nest all off graph-building inside scopes with custom getters and build models on top of the provided tensors as their parameters. Since sessions and variable scopes are no longer advisable in TF 2.0 (and all of that low-level stuff is moved to tf.compat.v1), what would be the best practice to implement the above using Keras and TF 2.0?
(Related issue on GitHub.)
Answer based on the comment below
Given you have:
kernel = createTheKernelVarBasedOnWhatYouWant() #shape (784, 64)
bias = createTheBiasVarBasedOnWhatYouWant() #shape (64,)
Make a simple function copying the code from Dense:
def custom_dense(x):
inputs, kernel, bias = x
outputs = K.dot(inputs, kernel)
outputs = K.bias_add(outputs, bias, data_format='channels_last')
return outputs
Use the function in a Lambda layer:
layer = Lambda(custom_dense)
hiddens = layer([x, kernel, bias])
Warning: kernel and bias must be produced from a Keras layer, or come from an kernel = Input(tensor=the_kernel_var) and bias = Input(tensor=bias_var)
If the warning above is bad for you, you can always use kernel and bias "from outside", like:
def custom_dense(inputs):
outputs = K.dot(inputs, kernel) #where kernel is not part of the arguments anymore
outputs = K.bias_add(outputs, bias, data_format='channels_last')
return outputs
layer = Lambda(custom_dense)
hiddens = layer(x)
This last option makes it a bit more complicated to save/load models.
Old answer
You should probably use a Keras Dense layer and set its weights in a standard way:
layer = tf.keras.layers.Dense(64, name='the_layer')
layer.set_weights([np.random.rand(784, 64), np.random.rand(64)])
If you need that these weights are not trainable, before compiling the keras model you set:
model.get_layer('the_layer').trainable=False
If you want direct access to the variables as tensors, they are:
kernel = layer.kernel
bias = layer.bias
There are plenty of other options, but that depends on your exact intention, which is not clear in your question.
Below is a general-purpose solution that works with arbitrary Keras models in TF2.
First, we need to define an auxiliary function canonical_variable_name and a context manager custom_make_variable with the following signatures (see implementation in meta-blocks library).
def canonical_variable_name(variable_name: str, outer_scope: str):
"""Returns the canonical variable name: `outer_scope/.../name`."""
# ...
#contextlib.contextmanager
def custom_make_variable(
canonical_custom_variables: Dict[str, tf.Tensor], outer_scope: str
):
"""A context manager that overrides `make_variable` with a custom function.
When building layers, Keras uses `make_variable` function to create weights
(kernels and biases for each layer). This function wraps `make_variable` with
a closure that infers the canonical name of the variable being created (of the
form `outer_scope/.../var_name`) and looks it up in the `custom_variables` dict
that maps canonical names to tensors. The function adheres the following logic:
* If there is a match, it does a few checks (shape, dtype, etc.) and returns
the found tensor instead of creating a new variable.
* If there is a match but checks fail, it throws an exception.
* If there are no matching `custom_variables`, it calls the original
`make_variable` utility function and returns a newly created variable.
"""
# ...
Using these functions, we can create arbitrary Keras models with custom tensors used as variables:
import numpy as np
import tensorflow as tf
canonical_custom_variables = {
"model/dense/kernel": tf.constant(
np.random.rand(784, 64), name="custom_kernel", dtype=tf.float32),
"model/dense/bias": tf.constant(
np.random.rand(64), name="custom_bias", dtype=tf.float32),
}
# Compute hiddens using a dense layer with custom variables.
x = tf.random.normal(shape=(1, 784), name="inputs")
with custom_make_variable(canonical_custom_variables, outer_scope="model"):
Layer = tf.layers.Dense(64)
hiddens = Layer(x)
print(Layer.variables)
Not entirely sure I understand your question correctly, but it seems to me that it should be possible to do what you want with a combination of custom layers and keras functional api.
Custom layers allow you to build any layer you want in a way that is compatible with Keras, e.g.:
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_outputs],
initializer='normal')
self.bias = self.add_weight("bias",
shape=[self.num_outputs,],
initializer='normal')
def call(self, inputs):
return tf.matmul(inputs, self.kernel) + self.bias
and the functional api allows you to access the outputs of said layers and re-use them:
inputs = keras.Input(shape=(784,), name='img')
x1 = MyDenseLayer(64, activation='relu')(inputs)
x2 = MyDenseLayer(64, activation='relu')(x1)
outputs = MyDenseLayer(10, activation='softmax')(x2)
model = keras.Model(inputs=inputs, outputs=outputs, name='mnist_model')
Here x1 and x2 can be connected to other subnets.
I am working with the keras-capsnet implementation of Capsule Networks, and am trying to apply the same layer to 30 images per sample.
The weights are initialized within the init and build arguments for the class, shown below. I have successfully shared the weights between the primary routing layers which just use tf.layers.conv2d, where I can assign them the same name and set reuse = True.
Does anyone know how to initialize weights in a Keras custom layer so that they may be reused? I am much more familiar with the tensorflow API than with the Keras one!
def __init__(self, num_capsule, dim_capsule, routings=3,
kernel_initializer='glorot_uniform',
**kwargs):
super(CapsuleLayer, self).__init__(**kwargs)
self.num_capsule = num_capsule
self.dim_capsule = dim_capsule
self.routings = routings
self.kernel_initializer = initializers.get(kernel_initializer)
def build(self, input_shape):
assert len(input_shape) >= 3, "The input Tensor should have shape=[None, input_num_capsule, input_dim_capsule]"
self.input_num_capsule = input_shape[1]
self.input_dim_capsule = input_shape[2]
# Weights are initialized here each time the layer is called
self.W = self.add_weight(shape=[self.num_capsule, self.input_num_capsule,
self.dim_capsule, self.input_dim_capsule],
initializer=self.kernel_initializer,
name='W')
self.built = True
The answer was simple. Set up a layer without calling it on input, and then use that built layer to call the data individually.
Hi, I want to add element-wise multiplication layer to duplicate the input to multi-channels like this figure. (So, the input size M x N and multiplication filter size M x N is same), as illustrated in this figure
I want to add custom initialization value to filter, and also want them to get gradient while training. However, I can't find element-wise filter layer in PyTorch. Can I make it? Or is it just impossible in PyTorch?
In pytorch you can always implement your own layers, by making them subclasses of nn.Module. You can also have trainable parameters in your layer, by using nn.Parameter.
Possible implementation of such layer might look like
import torch
from torch import nn
class TrainableEltwiseLayer(nn.Module)
def __init__(self, n, h, w):
super(TrainableEltwiseLayer, self).__init__()
self.weights = nn.Parameter(torch.Tensor(1, n, h, w)) # define the trainable parameter
def forward(self, x):
# assuming x is of size b-n-h-w
return x * self.weights # element-wise multiplication
You still need to worry about initializing the weights. look into nn.init on ways to init weights. Usually, one init the weights of all the net prior to training and prior to loading any stored model (so partially trained models can override random init). Something like
model = mymodel(*args, **kwargs) # instantiate a model
for m in model.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weights.data) # init for conv layers
if isinstance(m, TrainableEltwiseLayer):
nn.init.constant_(m.weights.data, 1) # init your weights here...
I'm trying to create a model which has words as inputs. Most of those words are in the glove word vector set (~50000). However, some of the frequent words are not (~1000). The question is, how do I concatenate the following two embedding layers to create one giant Embedding lookup table?
trained_em = Embedding(50000, 50,
weights=np.array([word2glove[w] for w in words_in_glove]),
trainable=False)
untrained_em = Embedding(1000, 50)
As far as I understand these are simply two lookup tables with same number of dimensions. So I'm hoping that there is a way to stack these two lookup tables.
Edit 1:
I just realised that this is probably going to be more than stacking Embedding layers because the input sequence would be a number from 0-50999. However untrained_em above only expect a number from 0-999. So perhaps a different solution is required.
Edit 2:
This is what I would expect to do in a numpy array representing the Embedding:
np.random.seed(42) # Set seed for reproducibility
pretrained = np.random.randn(15,3)
untrained = np.random.randn(5,3)
final_embedding = np.vstack([pretrained, untrained])
word_idx = [2, 5, 19]
np.take(final_embedding, word_idx, axis=0)
I believe the last bit can be done with something to do with keras.backend.gather but not sure how to put it all together.
Turns out that I need to implement a custom layer. Which was implemented by tweaking the orignial Embedding class.
The two most important parts shown in the class below are self.embeddings = K.concatenate([fixed_weight, variable_weight], axis=0) and out = K.gather(self.embeddings, inputs). The first is hopefully self explanatory while the second picks out the relevant input rows from the embeddings table.
However, in the particular application that I'm working on, turns out that it works out better using an Embedding layer instead of the modified layer. Perhaps because the learning rate is too high. I will report back on this after I have experimented more.
from keras.engine.topology import Layer
import keras.backend as K
from keras import initializers
import numpy as np
class Embedding2(Layer):
def __init__(self, input_dim, output_dim, fixed_weights, embeddings_initializer='uniform',
input_length=None, **kwargs):
kwargs['dtype'] = 'int32'
if 'input_shape' not in kwargs:
if input_length:
kwargs['input_shape'] = (input_length,)
else:
kwargs['input_shape'] = (None,)
super(Embedding2, self).__init__(**kwargs)
self.input_dim = input_dim
self.output_dim = output_dim
self.embeddings_initializer = embeddings_initializer
self.fixed_weights = fixed_weights
self.num_trainable = input_dim - len(fixed_weights)
self.input_length = input_length
def build(self, input_shape, name='embeddings'):
initializer = initializers.get(self.embeddings_initializer)
shape1 = (self.num_trainable, self.output_dim)
variable_weight = K.variable(initializer(shape1), dtype=K.floatx(), name=name+'_var')
fixed_weight = K.variable(self.fixed_weights, name=name+'_fixed')
self._trainable_weights.append(variable_weight)
self._non_trainable_weights.append(fixed_weight)
self.embeddings = K.concatenate([fixed_weight, variable_weight], axis=0)
self.built = True
def call(self, inputs):
if K.dtype(inputs) != 'int32':
inputs = K.cast(inputs, 'int32')
out = K.gather(self.embeddings, inputs)
return out
def compute_output_shape(self, input_shape):
if not self.input_length:
input_length = input_shape[1]
else:
input_length = self.input_length
return (input_shape[0], input_length, self.output_dim)
So, my suggestion is to use only one Embedding layer (taking into consideration your indexing problem), and transfer the weights from the old layer to the new one.
So, what you're going to to in this suggestion is...
Create your new model with 51000 words:
inp = Input((1,))
emb = Embedding(51000,50)(inp)
out = the rest of the model.....
model = Model(inp,out)
Now take the embedding layer and give it the weights you had:
weights = np.array([word2glove[w] for w in words_in_glove])
newWeights = model.layers[1].get_weights()[0]
newWeights[:50000,:] = weights
model.layers[1].set_weights([newWeights])
This will give you a new embedding, larger than the previous one, with a great part of its weights already trained, and the remaining randomly initialized.
Unfortunately, you will have to let everything be trained.