a problem and how to deal with batch while creating a Model - python

from keras_multi_head import MultiHeadAttention
import keras
from keras.layers import Dense,Input,Multiply
from keras import backend as K
from keras.layers.core import Dropout, Layer
from keras.models import Sequential,Model
import numpy as np
import tensorflow as tf
from self_attention_layer import Encoder
## multi source attention
class Multi_source_attention(keras.Model):
def __init__(self,read_n,embed_dim,num_heads,ff_dim,num_layers):
super().__init__()
self.read_n = read_n
self.embed_dim = embed_dim
self.num_heads = num_heads
self.ff_dim = ff_dim
self.num_layers = num_layers
self.get_weights = Dense(49, activation = 'relu',name = "get_weights")
def compute_output_shape(self,input_shape):
#([batch,7,7,256],[1,256])
return input_shape
def call(self,inputs):
## weights matrix
#(1,49)
weights_res = self.get_weights(inputs[1])
#(1,7,7)
weights = tf.reshape(weights_res,(1,7,7))
#(256,7,7)
weights = tf.tile(weights,[256,1,1])
## img from mobilenet
img=tf.reshape(inputs[0],[-1,7,7])
inter_res = tf.multiply(img,weights)
inter_res = tf.reshape(inter_res, (-1,256,49))
print(inter_res.shape)
att = Encoder(self.embed_dim,self.num_heads,self.ff_dim,self.num_layers)(inter_res)
return att
I try to construct a network to implement the part circled in the image. The output from LSTM **(1,256) and from the previous Mobilenet (batch,7,7,256). Then the output of LSTM is transformed to a weights matrix in form of (7,7).
But the problem is that the input shape of the output from mobilenet has a attribute batch. I have no idea how to deal with "batch" or how to set up a parameter to constraint the batch?
Could someone give me a tip?
And if I remove the function compute_output_shape(), one error unimplementerror occurs. the keras official doc tells me that I don't need to overwrite the function.
Could someone explain me about that?

Compute_output_shape is crucial to custom the layer. if the function summary() is called, the corresponding Graph is generated where the input and output shapes are showed in every layer. The compute_output_shape is responsible for the output shape.

Related

Input 0 is incompatible with layer repeat_vector_40: expected ndim=2, found ndim=1

I am developing an LSTM autoencoder model for anomaly detection. I have my keras model setup as below:
from keras.models import Sequential
from keras import Model, layers
from keras.layers import Layer, Conv1D, Input, Masking, Dense, RNN, LSTM, Dropout, RepeatVector, TimeDistributed, Masking, Reshape
def create_RNN_with_attention():
x=Input(shape=(X_train_dt.shape[1], X_train_dt.shape[2]))
RNN_layer_1 = LSTM(units=64, return_sequences=False)(x)
attention_layer = attention()(RNN_layer_1)
dropout_layer_1 = Dropout(rate=0.2)(attention_layer)
repeat_vector_layer = RepeatVector(n=X_train_dt.shape[1])(dropout_layer_1)
RNN_layer_2 = LSTM(units=64, return_sequences=True)(repeat_vector_layer)
dropout_layer_1 = Dropout(rate=0.2)(RNN_layer_2)
output = TimeDistributed(Dense(X_train_dt.shape[2], trainable=True))(dropout_layer_1)
model=Model(x,output)
model.compile(loss='mae', optimizer='adam')
return model
Notice the attention layer that I added, attention_layer. Before adding this, the model compiled perfectly, however after adding this attention_layer - the model is throwing out the following error: ValueError: Input 0 is incompatible with layer repeat_vector_40: expected ndim=2, found ndim=1
My attention layer is setup as follows:
import keras.backend as K
class attention(Layer):
def __init__(self,**kwargs):
super(attention,self).__init__(**kwargs)
def build(self,input_shape):
self.W=self.add_weight(name='attention_weight', shape=(input_shape[-1],1),
initializer='random_normal', trainable=True)
self.b=self.add_weight(name='attention_bias', shape=(input_shape[1],1),
initializer='zeros', trainable=True)
super(attention, self).build(input_shape)
def call(self,x):
# Alignment scores. Pass them through tanh function
e = K.tanh(K.dot(x,self.W)+self.b)
# Remove dimension of size 1
e = K.squeeze(e, axis=-1)
# Compute the weights
alpha = K.softmax(e)
# Reshape to tensorFlow format
alpha = K.expand_dims(alpha, axis=-1)
# Compute the context vector
context = x * alpha
context = K.sum(context, axis=1)
return context
The idea of the attention mask is to allow the model to focus on more prominent features as is trains.
Why am I getting the error above and how can I fix this?
I think that the problem lies in this line:
RNN_layer_1 = LSTM(units=64, return_sequences=False)(x)
This layer outputs a tensor of shape (batch_size, 64). So this means that you output a vector and then run attention mechanism on w.r.t. to the batch dimension instead of a sequential dimension. This also means that you output with a squeezed batch dimension that is not acceptable for any keras layer. This is why the Repeat layer raises error as it expects vector of at least shape (batch_dimension, dim).
If you want to run attention mechanism over a sequence then you should switch the line mentioned above to:
RNN_layer_1 = LSTM(units=64, return_sequences=True)(x)

Cannot convert a symbolic Keras input/output to a numpy array in a simple CNN

This is a very simple problem that I cannot get around. I am new to tensorflow and this is the second time I am facing this problem.
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten, Input
from tensorflow.keras.models import Model
import numpy as np
x = tf.keras.Input(shape=(128, 128, 4))
conv = Conv2D(30, (3, 3), activation='relu',input_shape=(128, 128, 4))(x)
conv = Conv2D(12, (5,5))(conv)
conv = MaxPooling2D(pool_size=(2,2))(conv)
print(conv[2])
conv = np.array(conv[2]) # <---- here is the problem
input_mean = np.mean(conv[1:], axis=0)
input_std = np.std(conv, axis=0)
conv = (conv - input_mean) / input_std
conv = Flatten()(conv)
conv = Dense(157, activation='relu')(conv)
model = Model(inputs = x, outputs = conv)
#model.summary()
The error that I am getting is,
Cannot convert a symbolic Keras input/output to a numpy array. This error may indicate that you're trying to pass a symbolic value to a NumPy call, which is not supported. Or, you may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model.
My question is, How would I take the Output from my Maxpooling layer and take the mean and standard deviation for each incoming channel? The output of the mean and std would be a tensor where each channel is separately normalized. I would then flatten this output and send it to my fully connected dense layer.
Thanks in advance.
I obtained a similar error and I performed the following:
del model
Before:
model = Model(inputs = x, outputs = conv)
It resolved my issue.
I am eager to know if it solves your issue too:) .

How to bypass portion of neural network in TensorFlow for some (but not all) features

In my TensorFlow model I have some data that I feed into a stack of CNNs before it goes into a few fully connected layers. I have implemented that with Keras' Sequential model. However, I now have some data that should not go into the CNN and instead be fed directly into the first fully connected layer because that data contains some values and labels that are part of the input data but that data should not undergo convolutions as it is not image data.
Is such a thing possible with tensorflow.keras or should I do that with tensorflow.nn instead? As far as I understand Keras' sequential models is that the input goes in one end and comes out the other with no special wiring in the middle.
Am I correct that to do this I have to use tensorflow.concat on the data from the last CNN layer and the data that bypasses the CNNs before feeding it into the first fully connected layer?
Here is an simple example in which the operation is to sum the activations from different subnets:
import keras
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Activation
tf.reset_default_graph()
# this represents your cnn model
def nn_model(input_x):
feature_maker = Dense(10, activation='relu')(input_x)
feature_maker = Dense(20, activation='relu')(feature_maker)
feature_maker = Dense(1, activation='linear')(feature_maker)
return feature_maker
# a list of input layers, of course the input shapes can be different
input_layers = [Input(shape=(3, )) for _ in range(2)]
coupled_feature = [nn_model(input_x) for input_x in input_layers]
# assume you take the sum of the outputs
coupled_feature = keras.layers.Add()(coupled_feature)
prediction = Dense(1, activation='relu')(coupled_feature)
model = keras.models.Model(inputs=input_layers, outputs=prediction)
model.compile(loss='mse', optimizer='adam')
# example training set
x_1 = np.linspace(1, 90, 270).reshape(90, 3)
x_2 = np.linspace(1, 90, 270).reshape(90, 3)
y = np.random.rand(90)
inputs_x = [x_1, x_2]
model.fit(inputs_x, y, batch_size=32, epochs=10)
You can actually plot the model to gain more intuition
from keras.utils.vis_utils import plot_model
plot_model(model, show_shapes=True)
The model of the above code looks like this
With a little remodeling and the functional API you can:
#create the CNN - it can also be a sequential
cnn_input = Input(image_shape)
cnn_output = Conv2D(...)(cnn_input)
cnn_output = Conv2D(...)(cnn_output)
cnn_output = MaxPooling2D()(cnn_output)
....
cnn_model = Model(cnn_input, cnn_output)
#create the FC model - can also be a sequential
fc_input = Input(fc_input_shape)
fc_output = Dense(...)(fc_input)
fc_output = Dense(...)(fc_output)
fc_model = Model(fc_input, fc_output)
There is a lot of space for creativity, this is just one of the ways.
#create the full model
full_input = Input(image_shape)
full_output = cnn_model(full_input)
full_output = fc_model(full_output)
full_model = Model(full_input, full_output)
You can use any of the three models in any way you want. They share the layers and the weights, so internally they are the same.
Saving and loading the full model might be quirky. I'd probably save the other two separately and when loading create the full model again.
Notice also that if you save two models that share the same layers, after loading they will probably not share these layers anymore. (Another reason for saving/loading only fc_model and cnn_model, while creating full_model again from code)

keras K.function error for layer output extraction

I currently have a modified resnet 50 architecture that takes two inputs. Building the model and training the model works fine, but when I’m trying to extract layer outputs using the backend function, I encounter errors.
I would prefer to extract layers using the backend function, rather than creating a new truncated model with just my layer of interest as the output.
The following snippet is self contained, and should be able to run and give the error I’ve been seeing.
I've tried reformatting the function in a few ways, such as K.function( [ mymodel.input[0],mymodel.input[1] ] , [mymodel.layers[-1].layers[-6].output])
or
K.function( [ mymodel.layers[0].input,mymodel.layers[1].input ] , [mymodel.layers[-1].layers[-6].output])
but nothing seems to fix the issue
##imports
from keras.applications.resnet50 import ResNet50
from keras.layers import Input
from keras.layers import Lambda
from keras.models import Model
from keras.optimizers import Adam
import keras
import keras.backend as K
import numpy as np
#pop off the input
res = ResNet50(weights=None,include_top=True,classes=2)
res.layers.pop(0)
#add two inputs
auxinput= Input(batch_shape=(None,224,224,1), name='aux_input')
main_input = Input(batch_shape=(None,224,224,3), name='main_input')
#use a lambda functon to return just our main input (avoids errors from out auxilary input not being used in resnet50 component)
l_output = Lambda(lambda x: x[0])([main_input, auxinput])
#feed our main layer to resnet50
data_passed_thru = res(l_output)
#assemble the model with our two inputs, and output
mymodel = Model(inputs=[main_input, auxinput], outputs=[data_passed_thru])
mymodel.compile(optimizer=Adam(lr=0.001), loss= keras.losses.poisson, metrics=[ 'accuracy'])
print("my model summary:")
mymodel.summary()
##generate some fake data for testing
fake_aux= np.zeros((224,224))
fake_aux=fake_aux[None,...]
fake_aux=fake_aux[...,None]
print('fake aux input shape:', fake_aux.shape)
fake_main= np.zeros((224,224,3))
fake_main=fake_main[None,...]
print('fake main input shape:', fake_main.shape)
#check our model inputs and target layer
print("inputs:", mymodel.input)
print("layer outout I'm trying to extract:", mymodel.layers[-1].layers[-6])
#create function to feed inputs, get our desired layer outputs
get_output_func = K.function( mymodel.input , [mymodel.layers[-1].layers[-6].output])
##this is the line that fails
X= [fake_main,fake_aux]
preds=get_output_func(X)
The error message I get is
InvalidArgumentError: You must feed a value for placeholder tensor 'input_1' with dtype float and shape [?,224,224,3]
[[{{node input_1}}]]
I managed to fix it by accessing the Resnet50 inputs directly for the function, rather than just the whole model's initial inputs. The K.function that works is
get_output_func = K.function( [mymodel.layers[-1].get_input_at(0)] , [mymodel.layers[-1].layers[-6].output])
X= [fake_main]
preds=get_output_func(X)
It only works because of my architecture only depends on the 1 input passing through, so not sure what the solution would be for other situations, but works for my case

Changing activation function of a keras layer w/o replacing whole layer

I am trying to change the activation function of the last layer of a keras model without replacing the whole layer. In this case, only the softmax function
import keras.backend as K
from keras.models import load_model
from keras.preprocessing.image import load_img, img_to_array
import numpy as np
model = load_model(model_path) # Load any model
img = load_img(img_path, target_size=(224, 224))
img = img_to_array(img)
print(model.predict(img))
My output:
array([[1.53172877e-07, 7.13159451e-08, 6.18941920e-09, 8.52070968e-07,
1.25813088e-07, 9.98970985e-01, 1.48254022e-08, 6.09538893e-06,
1.16236095e-07, 3.91888688e-10, 6.29304608e-08, 1.79565995e-09,
1.75571788e-08, 1.02110009e-03, 2.14380114e-09, 9.54465733e-08,
1.05938483e-07, 2.20544337e-07]], dtype=float32)
Then I do this to change the activation:
model.layers[-1].activation = custom_softmax
print(model.predict(test_img))
and the output I got is exactly the same. Any ideas how to fix? Thanks!
You could try to use the custom_softmax below:
def custom_softmax(x, axis=-1):
"""Softmax activation function.
# Arguments
x : Tensor.
axis: Integer, axis along which the softmax normalization is applied.
# Returns
Tensor, output of softmax transformation.
# Raises
ValueError: In case `dim(x) == 1`.
"""
ndim = K.ndim(x)
if ndim >= 2:
return K.zeros_like(x)
else:
raise ValueError('Cannot apply softmax to a tensor that is 1D')
At the current state of things there's no official, clean way to do that. As pointed by #layser in the comments, the Tensorflow graph isn't being updated - which results in the lack of change in your output. One option is to use keras-vis' utils. My recommendation is to isolate that in your own utils.py, like so:
from vis.utils.utils import apply_modifications
def update_layer_activation(model, activation, index=-1):
model.layers[index].activation = activation
return apply_modifications(model)
Which would lead to a similar use:
model = update_layer_activation(model, custom_softmax)
If you follow the given link, you'll see what they do is quite simple: they save the model to a temporary path, then load it back and return, finally deleting the temp file.

Categories