I get the following binary classification Keras model, which trains not well, but trains:
def vgg_stack(self):
def func(x):
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = layers.Conv2D(128, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
x = layers.Conv2D(128, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(1, activation='sigmoid')(x)
return x
return func
def implement(self):
self.inputs = layers.Input((self.input_width, self.input_height, self.input_depth))
self.outputs = self.vgg_stack()(self.inputs)
self.opt = optimizers.Adam(lr=self.learning_rate)
self.model = models.Model(inputs=self.inputs, outputs=self.outputs)
self.model.compile(loss='binary_crossentropy', optimizer=self.opt)
def fit_predict(self):
...
self.model.fit(data_train, actuals_train, batch_size=self.batch_size, epochs=10, verbose=1,
validation_data=[data_validation, actuals_validation], callbacks=[self])
it's predictions look like following
[[ 0.58952832]
[ 0.89163774]
[ 0.99083483]
...,
[ 0.52727282]
[ 0.72056866]
[ 0.99504411]]
I.e. it's something.
I tried to convert the model to pure tensroflow and got
def conv2drelu(self, x, filters, kernel_size, padding='VALID'):
input_depth = int(x.get_shape()[-1])
weights = tf.Variable(tf.truncated_normal([kernel_size[0], kernel_size[0], input_depth, filters],
dtype=tf.float32, stddev=self.init_stddev))
self.var_list.append(weights)
biases = tf.Variable(tf.constant(0.0, shape=[filters], dtype=tf.float32))
self.var_list.append(biases)
y = tf.nn.conv2d(x, weights, [1, 1, 1, 1], padding=padding)
y = tf.nn.bias_add(y, biases)
y = tf.nn.relu(y)
return y
def maxpooling(self, x, pool_size, strides, padding='VALID'):
y = tf.nn.max_pool(x, ksize=[1, pool_size[0], pool_size[1], 1], strides=[1, strides[0], strides[1], 1],
padding=padding)
return y
def flatten(self, x):
shape = int(np.prod(x.get_shape()[1:]))
y = tf.reshape(x, [-1, shape])
return y
def dense(self, x, units, activation):
shape = int(x.get_shape()[1])
weights = tf.Variable(tf.truncated_normal([shape, units], dtype=tf.float32, stddev=self.init_stddev))
self.var_list.append(weights)
biases = tf.Variable(tf.constant(0.0, shape=[units], dtype=tf.float32))
self.var_list.append(biases)
y = tf.matmul(x, weights)
y = tf.nn.bias_add(y, biases)
if activation == 'relu':
y = tf.nn.relu(y)
elif activation == 'sigmoid':
y = tf.nn.sigmoid(y)
return y
def vgg_stack(self, x):
x = self.conv2drelu(x, 64, (3, 3))
x = self.maxpooling(x, (3, 3), strides=(2, 2))
x = self.conv2drelu(x, 128, (3, 3))
x = self.maxpooling(x, (2, 2), strides=(2, 2))
x = self.conv2drelu(x, 128, (3, 3))
x = self.maxpooling(x, (2, 2), strides=(2, 2))
x = self.conv2drelu(x, 64, (3, 3))
x = self.maxpooling(x, (2, 2), strides=(2, 2))
x = self.flatten(x)
x = self.dense(x, 512, activation='relu')
x = self.dense(x, 256, activation='relu')
x = self.dense(x, 1, activation='sigmoid')
return x
def implement(self):
self.var_list = []
self.input_data = tf.placeholder(tf.float32, shape=(None, self.width, self.height, self.depth))
self.prediction = self.vgg_stack(self.input_data)
self.actual = tf.placeholder(tf.float32, shape=(None, 1))
self.log_loss = tf.losses.log_loss(self.actual, self.prediction)
opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
# self.step = opt.minimize(self.mean_squared_error, var_list=self.var_list)
self.step = opt.minimize(self.log_loss, var_list=self.var_list)
i.e. I tries to write functions equivalent to each Keras layer and then combine them into the same structure.
I used all the same numbers. Unfortunately, network provides something degraded:
[[ 0.46732453]
[ 0.46732453]
[ 0.46732453]
...,
[ 0.46732453]
[ 0.46732453]
[ 0.46732453]]
I.e. the same values for all samples.
What can be the reason of this?
Conversion was correct. I wrote unittests for convolution layers from Keras and Tensorflow and found they produce numerically identical results.
Additionally, I replaced optimization goal from just log-loss to sigmoid_cross_entropy_with_logits but this didn't helped alone.
The problem was with too small stdev of initialization values.
I was thinking it is enough to have it very small to break symmetry, and was setting it to 1e-8 or 1e-5, but this was wrong: such small values were nearly identical to zeros and after several layers network was starting to produce identical results for all samples.
After I changed stdev to 1e-1, then netwrok started to perfor as in Keras.
Related
Packages Version:
Tensorflow==2.5
Python==3.8
Keras==2.3
Here is the code:
# Pipe Line
(x_train, y_train), (x_test, y_test), (x_val, y_val) = (X_train, Y_train), (X_test, Y_test), (X_val, Y_val)
def model_seg():
# Convolution Layers (BatchNorm after non-linear activation)
img_input = Input(shape= (192, 256, 3))
x = Conv2D(16, (3, 3), padding='same', name='conv1')(img_input)
x = BatchNormalization(name='bn1')(x)
x = Activation('relu')(x)
x = Conv2D(32, (3, 3), padding='same', name='conv2')(x)
x = BatchNormalization(name='bn2')(x)
x = Activation('relu')(x)
x = MaxPooling2D()(x)
x = Conv2D(64, (3, 3), padding='same', name='conv3')(x)
x = BatchNormalization(name='bn3')(x)
x = Activation('relu')(x)
x = Conv2D(64, (3, 3), padding='same', name='conv4')(x)
x = BatchNormalization(name='bn4')(x)
x = Activation('relu')(x)
x = MaxPooling2D()(x)
x = Conv2D(128, (3, 3), padding='same', name='conv5')(x)
x = BatchNormalization(name='bn5')(x)
x = Activation('relu')(x)
x = Conv2D(128, (4, 4), padding='same', name='conv6')(x)
x = BatchNormalization(name='bn6')(x)
x = Activation('relu')(x)
x = MaxPooling2D()(x)
x = Conv2D(256, (3, 3), padding='same', name='conv7')(x)
x = BatchNormalization(name='bn7')(x)
x = Dropout(0.5)(x)
x = Activation('relu')(x)
x = Conv2D(256, (3, 3), padding='same', name='conv8')(x)
x = BatchNormalization(name='bn8')(x)
x = Activation('relu')(x)
x = MaxPooling2D()(x)
x = Conv2D(512, (3, 3), padding='same', name='conv9')(x)
x = BatchNormalization(name='bn9')(x)
x = Activation('relu')(x)
x = Dense(1024, activation = 'relu', name='fc1')(x)
x = Dense(1024, activation = 'relu', name='fc2')(x)
# Deconvolution Layers (BatchNorm after non-linear activation)
x = Conv2DTranspose(256, (3, 3), padding='same', name='deconv1')(x)
x = BatchNormalization(name='bn19')(x)
x = Activation('relu')(x)
x = UpSampling2D()(x)
x = Conv2DTranspose(256, (3, 3), padding='same', name='deconv2')(x)
x = BatchNormalization(name='bn12')(x)
x = Activation('relu')(x)
x = Conv2DTranspose(128, (3, 3), padding='same', name='deconv3')(x)
x = BatchNormalization(name='bn13')(x)
x = Activation('relu')(x)
x = UpSampling2D()(x)
x = Conv2DTranspose(128, (4, 4), padding='same', name='deconv4')(x)
x = BatchNormalization(name='bn14')(x)
x = Activation('relu')(x)
x = Conv2DTranspose(128, (3, 3), padding='same', name='deconv5')(x)
x = BatchNormalization(name='bn15')(x)
x = Activation('relu')(x)
x = UpSampling2D()(x)
x = Conv2DTranspose(64, (3, 3), padding='same', name='deconv6')(x)
x = BatchNormalization(name='bn16')(x)
x = Activation('relu')(x)
x = Conv2DTranspose(32, (3, 3), padding='same', name='deconv7')(x)
x = BatchNormalization(name='bn20')(x)
x = Activation('relu')(x)
x = UpSampling2D()(x)
x = Conv2DTranspose(16, (3, 3), padding='same', name='deconv8')(x)
x = BatchNormalization(name='bn17')(x)
x = Dropout(0.5)(x)
x = Activation('relu')(x)
x = Conv2DTranspose(1, (3, 3), padding='same', name='deconv9')(x)
x = BatchNormalization(name='bn18')(x)
x = Activation('sigmoid')(x)
pred = Reshape((192,256))(x)
model = Model(inputs=img_input, outputs=pred)
model.compile(optimizer= Adam(lr = 0.003), loss= [jaccard_distance], metrics=[iou])
hist = model.fit(x_train, y_train, epochs= 300, batch_size= 16,validation_data=(x_test, y_test), verbose=1)
model.save("model.h5")
accuracy = model.evaluate(x=x_test,y=y_test,batch_size=16)
print("Accuracy: ",accuracy[1])
Gives me this error in type conversion and I don't know how to fix it:
return gen_math_ops.mul(x, y, name)
D:\road-damage\road-damage-detection\rdd\lib\site-packages\tensorflow\python\ops\gen_math_ops.py:6248 mul
_, _, _op, _outputs = _op_def_library._apply_op_helper(
D:\road-damage\road-damage-detection\rdd\lib\site-packages\tensorflow\python\framework\op_def_library.py:555 _apply_op_helper
raise TypeError(
TypeError: Input 'y' of 'Mul' Op has type float32 that does not match type uint8 of argument 'x'.
Somewhere in your code, there's a tensor that is floats and a tensor that is integers, and it's not sure which one the result should be.
The architecture of your network does not tell us much; it is most likely to do with the way your data is being prepared.
If it is OK to treat your X and Y as floats, try explicitly converting them to floats like this before passing them to fit:
x_train = x_train.astype(np.float)
x_test = x_test.astype(np.float)
y_train = y_train.astype(np.float)
y_test = y_test.astype(np.float)
I'm trying to implement an autoencoder which gets 3 different inputs and fuse this three image. I want to get the output of a layer in the encoder and concatenate it with a layer in the decoder but when I run it I get graph disconnected error.
here is my code:
def create_model(input_shape):
input_1 = keras.layers.Input(input_shape)
input_2 = keras.layers.Input(input_shape)
input_3 = keras.layers.Input(input_shape)
network = keras.models.Sequential([
keras.layers.Conv2D(32, (7, 7), activation=tf.nn.relu, padding='SAME',input_shape=input_shape),
keras.layers.Conv2D(32, (7, 7), activation=tf.nn.relu, padding='SAME', name = 'a'),
keras.layers.AvgPool2D((2, 2)),
keras.layers.BatchNormalization(),
keras.layers.Dropout(0.3)])
encoded_1 = network(input_1)
encoded_2 = network(input_2)
encoded_3 = network(input_3)
a = network.get_layer('a').output
x = keras.layers.Concatenate()([encoded_1,encoded_2,encoded_3])
x = keras.layers.Conv2D(32, (3, 3), activation=tf.nn.relu, padding='SAME')(x)
x = keras.layers.UpSampling2D((2,2))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dropout(0.3)(x)
x = keras.layers.Concatenate()([x,a])
x = keras.layers.Conv2D(32, (3, 3), activation=tf.nn.relu, padding='SAME')(x)
x = keras.layers.UpSampling2D((2,2))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dropout(0.3)(x)
decoded = keras.layers.Conv2D(3, (3, 3), activation=tf.nn.relu, padding='SAME')(x)
final_net= keras.models.Model(inputs=[input_1,input_2,input_3],outputs=decoded)
return final_net
the error is:
Graph disconnected: cannot obtain value for tensor Tensor("conv2d_1_input:0", shape=(?, 128, 128, 1), dtype=float32) at layer "conv2d_1_input". The following previous layers were accessed without issue: ['input_6', 'input_5', 'input_4', 'sequential_1', 'sequential_1', 'sequential_1', 'concatenate', 'conv2d_2']
and it is because of concatenating [x,a]. I've tried to get the output of layer from three images like:
encoder_1.get_layer('a').output
encoder_2.get_layer('a').output
encoder_3.get_layer('a').output
but I got an error "'Tensor' object has no attribute 'output'"
You need to create a subnetwork if you need to get a1, a2 and a3 outputs. And can connext x and a as follows.
def create_model(input_shape):
input_1 = keras.layers.Input(input_shape)
input_2 = keras.layers.Input(input_shape)
input_3 = keras.layers.Input(input_shape)
network = keras.models.Sequential([
keras.layers.Conv2D(32, (7, 7), activation=tf.nn.relu, padding='SAME',input_shape=input_shape),
keras.layers.Conv2D(32, (7, 7), activation=tf.nn.relu, padding='SAME', name = 'a'),
keras.layers.AvgPool2D((2, 2)),
keras.layers.BatchNormalization(),
keras.layers.Dropout(0.3)])
encoded_1 = network(input_1)
encoded_2 = network(input_2)
encoded_3 = network(input_3)
subnet = keras.models.Sequential()
for l in network.layers:
subnet.add(l)
if l.name == 'a': break
a1 = subnet(input_1)
a2 = subnet(input_2)
a3 = subnet(input_3)
x = keras.layers.Concatenate()([encoded_1,encoded_2,encoded_3])
a = keras.layers.Concatenate()([a1,a2,a3])
x = keras.layers.Conv2D(32, (3, 3), activation=tf.nn.relu, padding='SAME')(x)
x = keras.layers.UpSampling2D((2,2))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dropout(0.3)(x)
x = keras.layers.Concatenate()([x,a])
x = keras.layers.Conv2D(32, (3, 3), activation=tf.nn.relu, padding='SAME')(x)
x = keras.layers.UpSampling2D((2,2))(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dropout(0.3)(x)
decoded = keras.layers.Conv2D(3, (3, 3), activation=tf.nn.relu, padding='SAME')(x)
final_net= keras.models.Model(inputs=[input_1,input_2,input_3],outputs=decoded)
return final_net
Working on a university exercise, I used the model sub-classing API of TF2.0. Here's my code (it's the Alexnet architecture, if you wonder...):
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
# OPS
self.relu = Activation('relu', name='ReLU')
self.maxpool = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='valid', name='MaxPool')
self.softmax = Activation('softmax', name='Softmax')
# Conv layers
self.conv1 = Conv2D(filters=96, input_shape=(224, 224, 3), kernel_size=(11, 11), strides=(4, 4), padding='same',
name='conv1')
self.conv2a = Conv2D(filters=128, kernel_size=(5, 5), strides=(1, 1), padding='same', name='conv2a')
self.conv2b = Conv2D(filters=128, kernel_size=(5, 5), strides=(1, 1), padding='same', name='conv2b')
self.conv3 = Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv3')
self.conv4a = Conv2D(filters=192, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv4a')
self.conv4b = Conv2D(filters=192, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv4b')
self.conv5a = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv5a')
self.conv5b = Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', name='conv5b')
# Fully-connected layers
self.flatten = Flatten()
self.dense1 = Dense(4096, input_shape=(100,), name='FC_4096_1')
self.dense2 = Dense(4096, name='FC_4096_2')
self.dense3 = Dense(1000, name='FC_1000')
# Network definition
def call(self, x, **kwargs):
x = self.conv1(x)
x = self.relu(x)
x = tf.nn.local_response_normalization(x, depth_radius=2, alpha=2e-05, beta=0.75, bias=1.0)
x = self.maxpool(x)
x = tf.concat((self.conv2a(x[:, :, :, :48]), self.conv2b(x[:, :, :, 48:])), 3)
x = self.relu(x)
x = tf.nn.local_response_normalization(x, depth_radius=2, alpha=2e-05, beta=0.75, bias=1.0)
x = self.maxpool(x)
x = self.conv3(x)
x = self.relu(x)
x = tf.concat((self.conv4a(x[:, :, :, :192]), self.conv4b(x[:, :, :, 192:])), 3)
x = self.relu(x)
x = tf.concat((self.conv5a(x[:, :, :, :192]), self.conv5b(x[:, :, :, 192:])), 3)
x = self.relu(x)
x = self.maxpool(x)
x = self.flatten(x)
x = self.dense1(x)
x = self.relu(x)
x = self.dense2(x)
x = self.relu(x)
x = self.dense3(x)
return self.softmax(x)
My goal is to access an arbitrary layer's output (in order to maximize a specific neuron's activation, if you have to know exactly :) ). The problem is that trying to access any layer's output, I get an attribute error. For example:
model = MyModel()
print(model.get_layer('conv1').output)
# => AttributeError: Layer conv1 has no inbound nodes.
I found some questions with this error here in SO, and all of them claim that I have to define the input shape in the first layer, but as you can see - it's already done (see the definition of self.conv1 in the __init__ function)!
I did find that if I define a keras.layers.Input object, I do manage to get the output of conv1, but trying to access deeper layers fails, for example:
model = MyModel()
I = tf.keras.Input(shape=(224, 224, 3))
model(I)
print(model.get_layer('conv1').output)
# prints Tensor("my_model/conv1/Identity:0", shape=(None, 56, 56, 96), dtype=float32)
print(model.get_layer('FC_1000').output)
# => AttributeError: Layer FC_1000 has no inbound nodes.
I googled every exception that I got on the way, but found no answer. How can I access any layer's input/output (or input/output _shape attributes, by the way) in this case?
In sub-classed model there is no graph of layers, it's just a piece of code (models call function). Layer connections are not defined while creating instance of Model class. Hence we need to build model first by calling call method.
Try this:
model = MyModel()
inputs = tf.keras.Input(shape=(224,224,3))
model.call(inputs)
# instead of model(I) in your code.
After doing this model graph is created.
for i in model.layers:
print(i.output)
# output
# Tensor("ReLU_7/Relu:0", shape=(?, 56, 56, 96), dtype=float32)
# Tensor("MaxPool_3/MaxPool:0", shape=(?, 27, 27, 96), dtype=float32)
# Tensor("Softmax_1/Softmax:0", shape=(?, 1000), dtype=float32)
# ...
My neural network receives a (1000, 1000, 5) shape array which undergoes convolution in one branch (5 stacked raster images) and a (12) shape array (just 12 numbers) which go through a couple of dense layers in a second branch.
The outputs are concatenated into a (31, 31, 65) shape tensor which then goes deconvolution into a final (1000, 1000) shape array.
My Issue:
I made my own simple loss function (mean error), because the output represents temperature in an area.
My issue is currently that my loss goes down significantly over 200 epochs (both loss and val_loss, from a small decimal to about -3) and the accuracy hovers around 0.002 the entire time .
I have changed the learning rate as low as 1e-5. I have given more samples to the training set (there aren't many samples to begin with unfortunately), increased (for fear of overfitting) and decreased (for lack of data) the batch size. All the input data is normalized to 0:1, which makes losses of anything beyond -1 unreasonable.
I am not sure whether I should use a different optimizer for this task, or different activation, or just remove a layer or two. But mostly I'd love to understand what is happening to make the model so unreliable.
I really tried to refrain from having to post the entire thing on here but I am officially out of ideas.
MLP Branch
dim = 12
inputs = Input(shape = (dim, ))
x = inputs
x = Dense(dim * 4, activation = 'relu')(x)
x = Dense(dim * 16, activation = 'relu')(x)
x = Dense(961, activation = 'relu')(x) # 961 nodes
x = Reshape((31, 31, 1))(x) # (31, 31, 1) array
model1 = Model(inputs, x)
Convolutional Branch
inputShape = (1000, 1000, 5)
chanDim = -1
inputs = Input(shape = inputShape)
x = inputs
# layer 1: conv, f = 8, pool = 2
x = Conv2D(8, (3, 3), padding = 'same', activation = 'relu')(x)
x = BatchNormalization(axis = chanDim)(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
# layer 2: conv, f = 16, pool = 2
x = Conv2D(16, (3, 3), padding = 'same', activation = 'relu')(x)
x = BatchNormalization(axis = chanDim)(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
# layer 3: conv, f = 32, pool = 2
x = Conv2D(32, (3, 3), padding = 'same', activation = 'relu')(x)
x = BatchNormalization(axis = chanDim)(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
# layer 4: conv = 64, pool = 4
x = Conv2D(64, (3, 3), padding = 'same', activation = 'relu')(x)
x = BatchNormalization(axis = chanDim)(x)
x = MaxPooling2D(pool_size = (4, 4))(x)
model2 = Model(inputs, x)
Deconvolution
combinedInput = Concatenate()([model1.output, model2.output])
x = combinedInput # (31, 31, 65)
x = Conv2DTranspose(43, (3, 3), strides = (4, 4), padding = 'same', activation = 'relu')(x) # (124, 124, 43)
x = Conv2DTranspose(22, (3, 3), strides = (2, 2), padding = 'same', activation = 'relu')(x) # (248, 248, 22)
x = Lambda(lambda y: spatial_2d_padding(y))(x) # (250, 250, 22)
x = Conv2DTranspose(10, (3, 3), strides = (2, 2), padding = 'same', activation = 'relu')(x) # (500, 500, 10)
x = Conv2DTranspose(1, (3, 3), strides = (2, 2), padding = 'same', activation = 'linear')(x) # (1000, 1000, 1)
x = Lambda(lambda y: squeeze(y, axis = 3))(x) # (1000, 1000)
Compiling
def custom_loss(y_actual, y_predicted):
custom_loss_value = mean(y_actual - y_predicted)
return custom_loss_value
model = Model(inputs = [mlp.input, cnn.input], outputs = x)
model.compile(loss = custom_loss, optimizer = Adam(lr = 0.000001), metrics = ['mae'])
# train with epochs = 200, batch_size = 12
The Issue
As I explained above, my loss never stabilizes and the accuracy hovers roughly around the same number over the epochs.
I'd love to know possible reasons and possible solutions.
Edits:
Since writing this question I have attempted:
Transfering layers from the convolution branch to the deconvolution branch.
Adding BatchNormalization() after every Conv2DTranspose() layer.
I'm relatively new to keras/CNNs. I've built a model that concatenates the output from 3 sequential CNNs with some other metadata into a final Dense network. The outputs from the 3 individual layers are sensible and perform ok, but the final output when concatenated with the meta data is showing worse performance and doesn't seem to learn - even though some of this meta data should be very useful for prediction. The labels are one-hot coded classification data (4 different labels). I'm a bit confused why the final concatenated model is performing so poorly compared to the individual pieces, would appreciate any insight into what I might be doing wrong here. Thanks!
# create first conv layers
first_input = Input(shape=input_shape, dtype='int32', name='first_input')
x = Embedding(input_dim=num_features,output_dim=embedding_dim,input_length=input_shape[0])(first_input)
#x = Dropout(rate = dropout_rate)(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = MaxPooling1D(pool_size=pool_size)(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = GlobalAveragePooling1D()(x)
aux_predictions = Dense(op_units, activation=op_activation)(x)
# now create a convolutional model for second
second_input = Input(shape=input_shape, dtype='int32', name='second_input')
x = Embedding(input_dim=num_features,output_dim=embedding_dim,input_length=input_shape[0])(second_input)
#x = Dropout(rate = dropout_rate)(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = MaxPooling1D(pool_size=pool_size)(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = GlobalAveragePooling1D()(x)
aux_predictions2 = Dense(op_units, activation=op_activation)(x)
# now create a convolutional model for second
third_input = Input(shape=input_shape, dtype='int32', name='third_input')
x = Embedding(input_dim=num_features,output_dim=embedding_dim,input_length=input_shape[0])(third_input)
#x = Dropout(rate = dropout_rate)(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = MaxPooling1D(pool_size=pool_size)(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = Conv1D(filters=filters * 2,
kernel_size=kernel_size,
strides = 1,
activation='relu',
bias_initializer='random_uniform',
padding='same')(x)
x = GlobalAveragePooling1D()(x)
aux_predictions3 = Dense(op_units, activation=op_activation)(x)
# Now combine three CNN layers with metadata
auxiliary_input = Input(shape=metadata_dim, name='aux_input')
x = keras.layers.concatenate([aux_predictions, aux_predictions2, aux_predictions3, auxiliary_input ])
#x = Dropout(rate = dropout_rate)(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
model_output = Dense(op_units, activation=op_activation, name='model_output')(x)
opt = SGD(lr=0.01)
fullmetamodel = Model(inputs=[first_input, second_input, third_input, auxiliary_input], outputs=[aux_predictions, aux_predictions2, aux_predictions3, model_output])
fullmetamodel.compile(
metrics=['categorical_accuracy'], loss='categorical_crossentropy',
loss_weights=[0.2, 0.2, 0.2, 1.], optimizer = opt)
callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=2), TQDMNotebookCallback(leave_inner=False, leave_outer = True)]
fullmetamodel.fit(x=[first_x_train, second_x_train, third_x_train, training_meta], y=[training_labels,training_labels,training_labels, training_labels],
batch_size=32, epochs=40, validation_data=([first_x_val, second_x_val, third_x_val, val_meta], [val_labels, val_labels, val_labels, val_labels])
, verbose = 0, callbacks = callbacks) # starts training
# Output, three conv layers working ok, concatenated model performing poorly
Training
50% 20/40 [2:49:34<2:49:23, 508.20s/it]
Epoch 20
[loss: 8.002, dense_118_loss: 0.749, dense_119_loss: 0.769, dense_120_loss: 0.876, model_output_loss: 7.523, dense_118_categorical_accuracy: 0.686
, dense_119_categorical_accuracy: 0.626, dense_120_categorical_accuracy: 0.620, model_output_categorical_accuracy: 0.532] : 66% 265184/400000 [05:13<02:40, 840.90it/s]