Related
I’m trying to implement a network in which the weights of the layers are calculated as a result of a tensor operation. This is the code I have for a single layer and is repeated for all conv and fc layers:
class NOWANet(nn.Module):
def __init__(self, V, Wstacked):
super(NOWANet, self).__init__()
# first conv layer
# define layer
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
# define tensors that will be used to calculate weights and biases
self.conv1_W_V = nn.Parameter(V[0], requires_grad=True)
self.conv1_B_V = nn.Parameter(V[1], requires_grad=True)
self.conv1_W_stack = nn.Parameter(Wstacked[0], requires_grad=False)
self.conv1_B_stack = nn.Parameter(Wstacked[1], requires_grad=False)
# set layer weights and bias
self.conv1.weight = nn.Parameter(torch.tensordot(self.conv1_W_V,
self.conv1_W_stack,
dims=1, out=None),
requires_grad=True)
self.conv1.bias = nn.Parameter(torch.tensordot(self.conv1_B_V,
self.conv1_B_stack,
dims=1, out=None),
requires_grad=True)
The idea is to perform a tensordot between the V and W_stack tensors, which then should be used as weights and bias (one tensordot each). The catch is that I only want to optimize V so that W_stack remains the intact. The way it is currently written initializes the weights correctly, but backprop optimizes over the actual weights of the layer, there is no change to the V vector.
Any ideas or suggestions on how to do it?
I am training an binary classifier using the Inception V3 model and I would like to feed some of the non-image features of my dataset into the network.
I have previously trained a logistic regression model with these features which performed well and I would like to see if I can improve my cnn by combining these models.
It looks like inception has a fully connected layer (logits layer) just before the softmax and I believe I should concatenate some nodes onto that layer to feed in my features. I have never done this, however.
The logits layer is build here - a snippet of the inception code
# Final pooling and prediction
with tf.variable_scope('logits'):
shape = net.get_shape()
net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool')
# 1 x 1 x 2048
net = ops.dropout(net, dropout_keep_prob, scope='dropout')
net = ops.flatten(net, scope='flatten')
# 2048
logits = ops.fc(net, num_classes, activation=None, scope='logits',
restore=restore_logits)
# 1000
end_points['logits'] = logits
if FLAGS.mode == '0_softmax':
end_points['predictions'] = tf.nn.softmax(logits, name='predictions')
The function to make the fully connected layer:
#scopes.add_arg_scope
def fc(inputs,
num_units_out,
activation=tf.nn.relu,
stddev=0.01,
bias=0.0,
weight_decay=0,
batch_norm_params=None,
is_training=True,
trainable=True,
restore=True,
scope=None,
reuse=None):
"""Adds a fully connected layer followed by an optional batch_norm layer.
FC creates a variable called 'weights', representing the fully connected
weight matrix, that is multiplied by the input. If `batch_norm` is None, a
second variable called 'biases' is added to the result of the initial
vector-matrix multiplication.
Args:
inputs: a [B x N] tensor where B is the batch size and N is the number of
input units in the layer.
num_units_out: the number of output units in the layer.
activation: activation function.
stddev: the standard deviation for the weights.
bias: the initial value of the biases.
weight_decay: the weight decay.
batch_norm_params: parameters for the batch_norm. If is None don't use it.
is_training: whether or not the model is in training mode.
trainable: whether or not the variables should be trainable or not.
restore: whether or not the variables should be marked for restore.
scope: Optional scope for variable_scope.
reuse: whether or not the layer and its variables should be reused. To be
able to reuse the layer scope must be given.
Returns:
the tensor variable representing the result of the series of operations.
"""
with tf.variable_scope(scope, 'FC', [inputs], reuse=reuse):
num_units_in = inputs.get_shape()[1]
weights_shape = [num_units_in, num_units_out]
weights_initializer = tf.truncated_normal_initializer(stddev=stddev)
l2_regularizer = None
if weight_decay and weight_decay > 0:
l2_regularizer = losses.l2_regularizer(weight_decay)
weights = variables.variable('weights',
shape=weights_shape,
initializer=weights_initializer,
regularizer=l2_regularizer,
trainable=trainable,
restore=restore)
if batch_norm_params is not None:
outputs = tf.matmul(inputs, weights)
with scopes.arg_scope([batch_norm], is_training=is_training,
trainable=trainable, restore=restore):
outputs = batch_norm(outputs, **batch_norm_params)
else:
bias_shape = [num_units_out,]
bias_initializer = tf.constant_initializer(bias)
biases = variables.variable('biases',
shape=bias_shape,
initializer=bias_initializer,
trainable=trainable,
restore=restore)
outputs = tf.nn.xw_plus_b(inputs, weights, biases)
if activation:
outputs = activation(outputs)
return outputs
My model has 10 non-image features, so I suppose I will use num_units_out + 10? I am not sure how what to do with the inputs. I assume I will add the feature data directly into this layer, by adding it to the input already coming from the previous layers. So in essence I will have two input layers.
Add your features just before the FC layer:
net = ops.flatten(net, scope='flatten')
# assuming both tensors have a shape like <batch>x<features>
net = tf.concat([net, my_other_features], axis=-1)
This will combine the existing FC part with a Input->FC->Sigmoid part into a single layer. Another way of saying it is that the final logistic layer (FC->Sigmoid) will get a feature vector input that consists of both your features and the features calculated by the CNN from the image.
The problem is the following. I have a categorical prediction task of vocabulary size 25K. On one of them (input vocab 10K, output dim i.e. embedding 50), I want to introduce a trainable weight matrix for a matrix multiplication between the input embedding (shape 1,50) and the weights (shape(50,128)) (no bias) and the resulting vector score is an input for a prediction task along with other features.
The crux is, I think that the trainable weight matrix varies for each input, if I simply add it in. I want this weight matrix to be common across all inputs.
I should clarify - by input here I mean training examples. So all examples would learn some example specific embedding and be multiplied by a shared weight matrix.
After every so many epochs, I intend to do a batch update to learn these common weights (or use other target variables to do multiple output prediction)
LSTM? Is that something I should look into here?
With the exception of an Embedding layer, layers apply to all examples in the batch.
Take as an example a very simple network:
inp = Input(shape=(4,))
h1 = Dense(2, activation='relu', use_bias=False)(inp)
out = Dense(1)(h1)
model = Model(inp, out)
This a simple network with 1 input layer, 1 hidden layer and an output layer. If we take the hidden layer as an example; this layer has a weights matrix of shape (4, 2,). At each iteration the input data which is a matrix of shape (batch_size, 4) is multiplied by the hidden layer weights (feed forward phase). Thus h1 activation is dependent on all samples. The loss is also computed on a per batch_size basis. The output layer has a shape (batch_size, 1). Given that in the forward phase all the batch samples affected the values of the weights, the same is true for backdrop and gradient updates.
When one is dealing with text, often the problem is specified as predicting a specific label from a sequence of words. This is modelled as a shape of (batch_size, sequence_length, word_index). Lets take a very basic example:
from tensorflow import keras
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
sequence_length = 80
emb_vec_size = 100
vocab_size = 10_000
def make_model():
inp = Input(shape=(sequence_length, 1))
emb = Embedding(vocab_size, emb_vec_size)(inp)
emb = Reshape((sequence_length, emb_vec_size))(emb)
h1 = Dense(64)(emb)
recurrent = LSTM(32)(h1)
output = Dense(1)(recurrent)
model = Model(inp, output)
model.compile('adam', 'mse')
return model
model = make_model()
model.summary()
You can copy and paste this into colab and see the summary.
What this example is doing is:
Transform a sequence of word indices into a sequence of word embedding vectors.
Applying a Dense layer called h1 to all the batches (and all the elements in the sequence); this layer reduces the dimensions of the embedding vector. It is not a typical element of a network to process text (in isolation). But this seemed to match your question.
Using a recurrent layer to reduce the sequence into a single vector per example.
Predicting a single label from the "sentence" vector.
If I get the problem correctly you can reuse layers or even models inside another model.
Example with a Dense layer. Let's say you have 10 Inputs
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
# defining 10 inputs in a List with (X,) shape
inputs = [Input(shape = (X,),name='input_{}'.format(k)) for k in
range(10)]
# defining a common Dense layer
D = Dense(64, name='one_layer_to_rule_them_all')
nets = [D(inp) for inp in inputs]
model = Model(inputs = inputs, outputs = nets)
model.compile(optimizer='adam', loss='categorical_crossentropy')
This code is not going to work if the inputs have different shapes. The first call to D defines its properties. In this example, outputs are set directly to nets. But of course you can concatenate, stack, or whatever you want.
Now if you have some trainable model you can use it instead of the D:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
# defining 10 inputs in a List with (X,) shape
inputs = [Input(shape = (X,),name='input_{}'.format(k)) for k in
range(10)]
# defining a shared model with the same weights for all inputs
nets = [special_model(inp) for inp in inputs]
model = Model(inputs = inputs, outputs = nets)
model.compile(optimizer='adam', loss='categorical_crossentropy')
The weights of this model are shared among all inputs.
My model is a resnet-152 i wanna cutting it into two submodels and the problem is with the second one i can't figure out how to build a model from an intermediate layer to the output
I tried this code from this response and it doesn't work for me here is my code:
def getLayerIndexByName(model, layername):
for idx, layer in enumerate(model.layers):
if layer.name == layername:
return idx
idx = getLayerIndexByName(resnet, 'res3a_branch2a')
input_shape = resnet.layers[idx].get_input_shape_at(0) # which is here in my case (None, 55, 55, 256)
layer_input = Input(shape=input_shape[1:]) # as keras will add the batch shape
# create the new nodes for each layer in the path
x = layer_input
for layer in resnet.layers[idx:]:
x = layer(x)
# create the model
new_model = Model(layer_input, x)
And i am getting this error:
ValueError: Input 0 is incompatible with layer res3a_branch1: expected axis -1 of input shape to have value 256 but got shape (None, 28, 28, 512).
I also tried this function:
def split(model, start, end):
confs = model.get_config()
kept_layers = set()
for i, l in enumerate(confs['layers']):
if i == 0:
confs['layers'][0]['config']['batch_input_shape'] = model.layers[start].input_shape
if i != start:
confs['layers'][0]['name'] += str(random.randint(0, 100000000)) # rename the input layer to avoid conflicts on merge
confs['layers'][0]['config']['name'] = confs['layers'][0]['name']
elif i < start or i > end:
continue
kept_layers.add(l['name'])
# filter layers
layers = [l for l in confs['layers'] if l['name'] in kept_layers]
layers[1]['inbound_nodes'][0][0][0] = layers[0]['name']
# set conf
confs['layers'] = layers
confs['input_layers'][0][0] = layers[0]['name']
confs['output_layers'][0][0] = layers[-1]['name']
# create new model
submodel = Model.from_config(confs)
for l in submodel.layers:
orig_l = model.get_layer(l.name)
if orig_l is not None:
l.set_weights(orig_l.get_weights())
return submodel
and i am getting this error:
ValueError: Unknown layer: Scale
as my resnet152 contains a Scale layer.
Here is a working version:
import resnet # pip install resnet
from keras.models import Model
from keras.layers import Input
def getLayerIndexByName(model, layername):
for idx, layer in enumerate(model.layers):
if layer.name == layername:
return idx
resnet = resnet.ResNet152(weights='imagenet')
idx = getLayerIndexByName(resnet, 'res3a_branch2a')
model1 = Model(inputs=resnet.input, outputs=resnet.get_layer('res3a_branch2a').output)
input_shape = resnet.layers[idx].get_input_shape_at(0) # get the input shape of desired layer
print(input_shape[1:])
layer_input = Input(shape=input_shape[1:]) # a new input tensor to be able to feed the desired layer
# create the new nodes for each layer in the path
x = layer_input
for layer in resnet.layers[idx:]:
x = layer(x)
# create the model
model2 = Model(layer_input, x)
model2.summary()
Here is the error:
ValueError: Input 0 is incompatible with layer res3a_branch1: expected axis -1 of input shape to have value 256 but got shape (None, 28, 28, 512)
As I mentioned in the comments section since the ResNet model does not have a linear architecture (i.e. it has skip connections and a layer may be connected to multiple layers), you can't simply go through the layers of the model one after another in a loop and apply a layer on the output of previous layer in the loop (i.e. unlike the models with a linear architecture for which this method works).
So you need to find the connectivity of the layers and traverse that connectivity map to be able to construct a sub-model of the original model. Currently, this solution comes to my mind:
Specify the last layer of your sub-model.
Start from that layer and find all the connected layers to it.
Get the output of those connected layers.
Apply the last layer on the collected output.
Obviously step #3 implies a recursion: to get the output of connected layers (i.e. X), we first need to find their connected layers (i.e. Y), get their outputs (i.e. outputs of Y) and then apply them on those outputs (i.e. apply X on outputs of Y). Further, to find the connected layer you need to know a bit about the internals of Keras which has been covered in this answer. So we come up with this solution:
from keras.applications.resnet50 import ResNet50
from keras import models
from keras import layers
resnet = ResNet50()
# this is the split point, i.e. the starting layer in our sub-model
starting_layer_name = 'activation_46'
# create a new input layer for our sub-model we want to construct
new_input = layers.Input(batch_shape=resnet.get_layer(starting_layer_name).get_input_shape_at(0))
layer_outputs = {}
def get_output_of_layer(layer):
# if we have already applied this layer on its input(s) tensors,
# just return its already computed output
if layer.name in layer_outputs:
return layer_outputs[layer.name]
# if this is the starting layer, then apply it on the input tensor
if layer.name == starting_layer_name:
out = layer(new_input)
layer_outputs[layer.name] = out
return out
# find all the connected layers which this layer
# consumes their output
prev_layers = []
for node in layer._inbound_nodes:
prev_layers.extend(node.inbound_layers)
# get the output of connected layers
pl_outs = []
for pl in prev_layers:
pl_outs.extend([get_output_of_layer(pl)])
# apply this layer on the collected outputs
out = layer(pl_outs[0] if len(pl_outs) == 1 else pl_outs)
layer_outputs[layer.name] = out
return out
# note that we start from the last layer of our desired sub-model.
# this layer could be any layer of the original model as long as it is
# reachable from the starting layer
new_output = get_output_of_layer(resnet.layers[-1])
# create the sub-model
model = models.Model(new_input, new_output)
Important notes:
This solution assumes that each layer in the original model has been used only once, i.e. it does not work for Siamese networks where a layer may be shared and therefore might be applied more than once on different input tensors.
If you want to have a proper split of a model into multiple sub-models, then it makes sense to use only those layers for split point (e.g. indicated by starting_layer_name in the above code) which are NOT in a branch (e.g. in ResNet the activation layers after merge layers are a good option, but the res3a_branch2a you have selected is not a good option since it's in a branch). To get a better view of the original architecture of the model, you can always plot its diagram using plot_model() utility function:
from keras.applications.resnet50 import ResNet50
from keras.utils import plot_model
resnet = ResNet50()
plot_model(model, to_file='resnet_model.png')
Since new nodes are created after constructing a sub-model, don't try to construct another sub-model which has overlap (i.e. if it does not have overlap, it's OK!) with the previous sub-model in the same run of the code above; otherwise, you may encounter errors.
In the case, when there is a layer with index middle, that is connected only to the previous layer (# middle-1) and all layers after are not connected directly to layers before it, we can use the fact that every model is saved as a list of layers and create two partial models this way:
model1 = keras.models.Model(inputs=model.input, outputs=model.layers[middle - 1].output)
input = keras.Input(shape=model.layers[middle-1].output_shape[1:])
# layers is a dict in the form {name : output}
layers = {}
layers[model.layers[middle-1].name] = input
for layer in model.layers[middle:]:
if type(layer.input) == list:
x = []
for layer_input in layer.input:
x.append(layers[layer_input.name.split('/')[0]])
else:
x = layers[layer.input.name.split('/')[0]]
y = layer(x)
layers[layer.name] = y
model2 = keras.Model(inputs = [input], outputs = [y])
Then it is easy to check that model2.predict(model1.predict(x)) gives same results as model.predict(x)
I had a similar problem with slicing an Inception CNN for transfer learning, to set only the layers after a certain point to trainable.
def get_layers_above(cutoff_layer,model):
def get_next_level(layer,model):
def wrap_list(val):
if type(val) is list:
return val
return [val]
r=[]
for output_t in wrap_list(layer.output):
r+=[x for x in model.layers if output_t.name in [y.name for y in wrap_list(x.input)]]
return r
visited=set()
to_visit=set([cutoff_layer])
while to_visit:
layer=to_visit.pop()
to_visit.update(get_next_level(layer,model))
visited.add(layer)
return list(visited)
I went with an iterative instead of a recursive solution because breadth-first traverse with sets seems like a safer solution for a network with many converging branches.
it should be used like this (InceptionV3 for example)
model = tf.keras.applications.InceptionV3(include_top=False,weights='imagenet',input_shape=(299,299,3))
layers=get_layers_above(model.get_layer('mixed9'),model)
print([l.name for l in layers])
output
['batch_normalization_89',
'conv2d_93',
'activation_86',
'activation_91',
'mixed10',
'activation_88',
'batch_normalization_85',
'activation_93',
'batch_normalization_90',
'conv2d_87',
'conv2d_86',
'batch_normalization_86',
'activation_85',
'conv2d_91',
'batch_normalization_91',
'batch_normalization_87',
'activation_90',
'mixed9',
'batch_normalization_92',
'batch_normalization_88',
'activation_87',
'concatenate_1',
'activation_89',
'conv2d_88',
'conv2d_92',
'average_pooling2d_8',
'activation_92',
'mixed9_1',
'conv2d_89',
'conv2d_85',
'conv2d_90',
'batch_normalization_93']
Imagine a fully-connected neural network with its last two layers of the following structure:
[Dense]
units = 612
activation = softplus
[Dense]
units = 1
activation = sigmoid
The output value of the net is 1, but I'd like to know what the input x to the sigmoidal function was (must be some high number, since sigm(x) is 1 here).
Folllowing indraforyou's answer I managed to retrieve the output and weights of Keras layers:
outputs = [layer.output for layer in model.layers[-2:]]
functors = [K.function( [model.input]+[K.learning_phase()], [out] ) for out in outputs]
test_input = np.array(...)
layer_outs = [func([test_input, 0.]) for func in functors]
print layer_outs[-1][0] # -> array([[ 1.]])
dense_0_out = layer_outs[-2][0] # shape (612, 1)
dense_1_weights = model.layers[-1].weights[0].get_value() # shape (1, 612)
dense_1_bias = model.layers[-1].weights[1].get_value()
x = np.dot(dense_0_out, dense_1_weights) + dense_1_bias
print x # -> -11.7
How can x be a negative number? In that case the last layers output should be a number closer to 0.0 than 1.0. Are dense_0_out or dense_1_weights the wrong outputs or weights?
Since you're using get_value(), I'll assume that you're using Theano backend. To get the value of the node before the sigmoid activation, you can traverse the computation graph.
The graph can be traversed starting from outputs (the result of some computation) down to its inputs using the owner field.
In your case, what you want is the input x of the sigmoid activation op. The output of the sigmoid op is model.output. Putting these together, the variable x is model.output.owner.inputs[0].
If you print out this value, you'll see Elemwise{add,no_inplace}.0, which is an element-wise addition op. It can be verified from the source code of Dense.call():
def call(self, inputs):
output = K.dot(inputs, self.kernel)
if self.use_bias:
output = K.bias_add(output, self.bias)
if self.activation is not None:
output = self.activation(output)
return output
The input to the activation function is the output of K.bias_add().
With a small modification of your code, you can get the value of the node before activation:
x = model.output.owner.inputs[0]
func = K.function([model.input] + [K.learning_phase()], [x])
print func([test_input, 0.])
For anyone using TensorFlow backend: use x = model.output.op.inputs[0] instead.
I can see a simple way just changing a little the model structure. (See at the end how to use the existing model and change only the ending).
The advantages of this method are:
You don't have to guess if you're doing the right calculations
You don't need to care about the dropout layers and how to implement a dropout calculation
This is a pure Keras solution (applies to any backend, either Theano or Tensorflow).
There are two possible solutions below:
Option 1 - Create a new model from start with the proposed structure
Option 2 - Reuse an existing model changing only its ending
Model structure
You could just have the last dense separated in two layers at the end:
[Dense]
units = 612
activation = softplus
[Dense]
units = 1
#no activation
[Activation]
activation = sigmoid
Then you simply get the output of the last dense layer.
I'd say you should create two models, one for training, the other for checking this value.
Option 1 - Building the models from the beginning:
from keras.models import Model
#build the initial part of the model the same way you would
#add the Dense layer without an activation:
#if using the functional Model API
denseOut = Dense(1)(outputFromThePreviousLayer)
sigmoidOut = Activation('sigmoid')(denseOut)
#if using the sequential model - will need the functional API
model.add(Dense(1))
sigmoidOut = Activation('sigmoid')(model.output)
Create two models from that, one for training, one for checking the output of dense:
#if using the functional API
checkingModel = Model(yourInputs, denseOut)
#if using the sequential model:
checkingModel = model
trainingModel = Model(checkingModel.inputs, sigmoidOut)
Use trianingModel for training normally. The two models share weights, so training one is training the other.
Use checkingModel just to see the outputs of the Dense layer, using checkingModel.predict(X)
Option 2 - Building this from an existing model:
from keras.models import Model
#find the softplus dense layer and get its output:
softplusOut = oldModel.layers[indexForSoftplusLayer].output
#or should this be the output from the dropout? Whichever comes immediately after the last Dense(1)
#recreate the dense layer
outDense = Dense(1, name='newDense', ...)(softPlusOut)
#create the new model
checkingModel = Model(oldModel.inputs,outDense)
It's important, since you created a new Dense layer, to get the weights from the old one:
wgts = oldModel.layers[indexForDense].get_weights()
checkingModel.get_layer('newDense').set_weights(wgts)
In this case, training the old model will not update the last dense layer in the new model, so, let's create a trainingModel:
outSigmoid = Activation('sigmoid')(checkingModel.output)
trainingModel = Model(checkingModel.inputs,outSigmoid)
Use checkingModel for checking the values you want with checkingModel.predict(X). And train the trainingModel.
So this is for fellow googlers, the working of the keras API has changed significantly since the accepted answer was posted. The working code for extracting a layer's output before activation (for tensorflow backend) is:
model = Your_Keras_Model()
the_tensor_you_need = model.output.op.inputs[0] #<- this is indexable, if there are multiple inputs to this node then you can find it with indexing.
In my case, the final layer was a dense layer with activation softmax, so the tensor output I needed was <tf.Tensor 'predictions/BiasAdd:0' shape=(?, 1000) dtype=float32>.
(TF backend)
Solution for Conv layers.
I had the same question, and to rewrite a model's configuration was not an option.
The simple hack would be to perform the call function manually. It gives control over the activation.
Copy-paste from the Keras source, with self changed to layer. You can do the same with any other layer.
def conv_no_activation(layer, inputs, activation=False):
if layer.rank == 1:
outputs = K.conv1d(
inputs,
layer.kernel,
strides=layer.strides[0],
padding=layer.padding,
data_format=layer.data_format,
dilation_rate=layer.dilation_rate[0])
if layer.rank == 2:
outputs = K.conv2d(
inputs,
layer.kernel,
strides=layer.strides,
padding=layer.padding,
data_format=layer.data_format,
dilation_rate=layer.dilation_rate)
if layer.rank == 3:
outputs = K.conv3d(
inputs,
layer.kernel,
strides=layer.strides,
padding=layer.padding,
data_format=layer.data_format,
dilation_rate=layer.dilation_rate)
if layer.use_bias:
outputs = K.bias_add(
outputs,
layer.bias,
data_format=layer.data_format)
if activation and layer.activation is not None:
outputs = layer.activation(outputs)
return outputs
Now we need to modify the main function a little. First, identify the layer by its name. Then retrieve activations from the previous layer. And at last, compute the output from the target layer.
def get_output_activation_control(model, images, layername, activation=False):
"""Get activations for the input from specified layer"""
inp = model.input
layer_id, layer = [(n, l) for n, l in enumerate(model.layers) if l.name == layername][0]
prev_layer = model.layers[layer_id - 1]
conv_out = conv_no_activation(layer, prev_layer.output, activation=activation)
functor = K.function([inp] + [K.learning_phase()], [conv_out])
return functor([images])
Here is a tiny test. I'm using VGG16 model.
a_relu = get_output_activation_control(vgg_model, img, 'block4_conv1', activation=True)[0]
a_no_relu = get_output_activation_control(vgg_model, img, 'block4_conv1', activation=False)[0]
print(np.sum(a_no_relu < 0))
> 245293
Set all negatives to zero to compare with the results retrieved after an embedded in VGG16 ReLu operation.
a_no_relu[a_no_relu < 0] = 0
print(np.allclose(a_relu, a_no_relu))
> True
easy way to define new layer with new activation function:
def change_layer_activation(layer):
if isinstance(layer, keras.layers.Conv2D):
config = layer.get_config()
config["activation"] = "linear"
new = keras.layers.Conv2D.from_config(config)
elif isinstance(layer, keras.layers.Dense):
config = layer.get_config()
config["activation"] = "linear"
new = keras.layers.Dense.from_config(config)
weights = [x.numpy() for x in layer.weights]
return new, weights
I had the same problem but none of the other answers worked for me. Im using a newer version of Keras with Tensorflow so some answers dont work now. Also the structure of the model is given so i can't change it easely. The general idea is to create a copy of the original model that will work exactly like the original one but spliting the activation from the outputs layers. Once this is done we can easely access the outputs values before the activation is applied.
First we will create a copy of the original model but with no activation on the outputs layers. This will be done using Keras clone_model function (See Docs).
from tensorflow.keras.models import clone_model
from tensorflow.keras.layers import Activation
original_model = get_model()
def f(layer):
config = layer.get_config()
if not isinstance(layer, Activation) and layer.name in original_model.output_names:
config.pop('activation', None)
layer_copy = layer.__class__.from_config(config)
return layer_copy
copy_model = clone_model(model, clone_function=f)
This alone will only make a clone with new weights so we must copy the original_model weights to the new one:
copy_model.build(original_model.input_shape)
copy_model.set_weights(original_model.get_weights())
Now we will add the activations layers:
from tensorflow.keras.models import Model
old_outputs = [ original_model.get_layer(name=name) for name in copy_model.output_names ]
new_outputs = [ Activation(old_output.activation)(output) if old_output.activation else output
for output, old_output in zip(copy_model.outputs, old_outputs) ]
copy_model = Model(copy_model.inputs, new_outputs)
Finally we could create a new model whose evaluation will be the outputs with no activation applied:
no_activation_outputs = [ copy_model.get_layer(name=name).output for name in original_model.output_names ]
no_activation_model = Model(copy.inputs, no_activation_outputs)
Now we could use copy_model like the original_model and no_activation_model to access pre-activation outputs. Actually you could even modify the code to split a custom set of layers instead of the outputs.