Overriding keras predict function - python

I have Keras model that accepts inputs which have 4D shapes as (n, height, width, channel).
However, my data generator is producing 2D arrays as(n, width*height). So, the predict function of Keras is expecting inputs as 4D. I have no chance to change the data generator because the model will be tested by someone else. So, is there a way to override the predict function of Keras.
My model structure
a = Input(shape=(width*height,))
d1 = 16 # depth of filter kernel each layer
d2 = 16
d3 = 64
d4 = 128
d5 = 256
drop_out = 0.25
patch_size = (3, 3)
k_size = (2, 2)
reshape = Reshape((height, width, 1))(a)
conv1 = Conv2D(filters=d1, kernel_size=patch_size, padding='same', activation='relu')(reshape)
conv1 = MaxPooling2D(pool_size=k_size, padding='same')(conv1)
conv2 = Convolution2D(filters=d2, kernel_size=patch_size, padding='same', activation='relu')(conv1)
conv2 = MaxPooling2D(pool_size=k_size, padding='same')(conv2)
conv3 = Convolution2D(filters=d3, kernel_size=patch_size, padding='same', activation='relu')(conv2)
conv3 = MaxPooling2D(pool_size=k_size, padding='same')(conv3)
conv4 = Convolution2D(filters=d4, kernel_size=patch_size, padding='same', activation='relu')(conv3)
conv4 = MaxPooling2D(pool_size=k_size, padding='same')(conv4)
conv5 = Convolution2D(filters=d5, kernel_size=patch_size, padding='same', activation='relu')(conv4)
conv5 = MaxPooling2D(pool_size=k_size, padding='same')(conv5)
x = Flatten()(conv5)
x = Dropout(drop_out)(x)
node = 32
x_1 = Dense(node, activation='relu')(x) # connect the flatten layer to five classifier,each one comes to a digit.
x_2 = Dense(node, activation='relu')(x)
x_3 = Dense(node, activation='relu')(x)
x_4 = Dense(node, activation='relu')(x)
x_5 = Dense(node, activation='relu')(x)
d1 = Dense(n_class, activation='softmax')(x_1)
d2 = Dense(n_class, activation='softmax')(x_2)
d3 = Dense(n_class, activation='softmax')(x_3)
d4 = Dense(n_class, activation='softmax')(x_4)
d5 = Dense(n_class, activation='softmax')(x_5)
outputs = [d1, d2, d3, d4, d5]
model = Model(a, outputs)
model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
model.fit(raw_train_data, raw_train_target, batch_size=200, epochs=5, validation_split=0.2)

You don't override the predict, you simply add a Reshape layer at the beginning of your model.
With the functional API:
from keras.layers import *
inp = Input((width*heigth,))
first = Reshape((width,height,1))(inp)
..... other layers.....
model = Model(inp, outputFromTheLastLayer)
With a sequential model:
model = Sequential()
model.add(Reshape((width,height,1), input_shape = (width*height,)))
model.add(otherlayers)
About the output shape.
Since you have 5 outputs, you need your target array to be a list of five arrays:
raw_train_target = [target1,target2,target3,target4,target5]
If you cannot do that, and raw_train_target is one single arary with the targets all following a sequence, you can try to use a concatenate layer at the end:
output = Concatenate()(outputs)

Related

How to set batch size in train_on_batch for a convolutional neural network in keras

I am trying to online-train a neural network. I want to use the Tensorflow Keras train_on_batch function on a convolutional neural network. Here it is:
look_back=1600
inputTensor = keras.layers.Input([look_back+3,2])
inputTensorReshaped = tf.reshape(inputTensor, [1, look_back + 3, 2, 1])
#split into 2 groups
inputgroup1 = keras.layers.Lambda(lambda x: x[:, :3], output_shape=((1, 3, 2, 1)))(inputTensorReshaped)
inputgroup2 = keras.layers.Lambda(lambda x: x[:, 3:look_back + 3], output_shape=((1, look_back,2, 1)))(inputTensorReshaped)
conv1 = keras.layers.Conv2D(filters=1024, kernel_size=(10, 2), activation='relu')(inputgroup2)#10
pool1 = keras.layers.MaxPooling2D(pool_size=(2, 1))(conv1)
dropout1 = keras.layers.Dropout(rate=0.1)(pool1)
norm1 = keras.layers.LayerNormalization()(dropout1)
conv2 = keras.layers.Conv2D(filters=512, kernel_size=(8, 1), activation='relu')(norm1)
pool2 = keras.layers.MaxPooling2D(pool_size=(2, 1))(conv2)
dropout2 = keras.layers.Dropout(rate=0.1)(pool2)
norm2 = keras.layers.LayerNormalization()(dropout2)
conv3 = keras.layers.Conv2D(filters=256, kernel_size=(6, 1), activation='relu')(norm2)
pool3 = keras.layers.MaxPooling2D(pool_size=(2, 1))(conv3)
dropout3 = keras.layers.Dropout(rate=0.1)(pool3)
norm3 = keras.layers.LayerNormalization()(dropout3)
conv4 = keras.layers.Conv2D(filters=128, kernel_size=(4, 1), activation='relu')(norm3)
pool4 = keras.layers.MaxPooling2D(pool_size=(2, 1))(conv4)
dropout4 = keras.layers.Dropout(rate=0.1)(pool4)
norm4 = keras.layers.LayerNormalization()(dropout4)
conv5 = keras.layers.Conv2D(filters=64, kernel_size=(2, 1), activation='relu')(norm4)
pool5 = keras.layers.MaxPooling2D(pool_size=(2, 1))(conv5)
dropout5 = keras.layers.Dropout(rate=0.1)(pool5)
norm5 = keras.layers.LayerNormalization()(dropout5)
flatten1 = keras.layers.Flatten()(norm5)
dense1 = keras.layers.Dense(32, activation='relu')(flatten1)
misclayer1 = keras.layers.Dense(32, activation='relu')(inputgroup1)
miscdropout1 = keras.layers.Dropout(rate=0.1)(misclayer1)
miscnorm1 = keras.layers.LayerNormalization()(miscdropout1)
misclayer2 = keras.layers.Dense(128, activation='relu')(miscnorm1)
miscdropout2 = keras.layers.Dropout(rate=0.1)(misclayer2)
miscnorm2 = keras.layers.LayerNormalization()(miscdropout2)
misclayer3 = keras.layers.Dense(32, activation='relu')(miscnorm2)
miscdropout3 = keras.layers.Dropout(rate=0.1)(misclayer3)
miscnorm3 = keras.layers.LayerNormalization()(miscdropout3)
miscflatten1 = keras.layers.Flatten()(miscnorm3)
misclayer4 = keras.layers.Dense(32, activation='relu')(miscflatten1)
rejoinlayer = keras.layers.Concatenate()([dense1, misclayer4])
processing1 = keras.layers.Dense(64, activation='relu')(rejoinlayer)
totalnorm1 = keras.layers.LayerNormalization()(processing1)
processing2 = keras.layers.Dense(32, activation='relu')(totalnorm1)
totaldropout1 = keras.layers.Dropout(rate=0.2)(processing2)
processing3 = keras.layers.Dense(16, activation='relu')(totaldropout1)
totalnorm2 = keras.layers.LayerNormalization()(processing3)
processing4 = keras.layers.Dense(8, activation='relu')(totalnorm2)
totaldropout2 = keras.layers.Dropout(rate=0.2)(processing4)
processing5 = keras.layers.Dense(4, activation='relu')(totaldropout2)
output = keras.layers.Dense(1, activation='linear')(processing5)
model = keras.Model(inputTensor,output)
model.compile(optimizer=keras.optimizers.SGD(learning_rate=0.00005, momentum=0.1, nesterov=True), loss="mean_squared_error")
#trains the model with the 1st state, action, and value
def train():
global qtable
x = []
y = []
for i in range(0, 8):
state = qtable.loc[qtable.index[i], "state"]
action = [qtable.loc[qtable.index[i], "action"], qtable.loc[qtable.index[0], "action"]]
x.append([action])
x[i].extend(state)
y.append([qtable.loc[qtable.index[i], "value"]])
print("training...loss:")
with tf.device('/gpu:0'):
print(model.train_on_batch(np.nan_to_num(np.array(x)), np.nan_to_num(np.array(y))))
In this case the variable "state" would be a 1202-by-2 list [[a,b],[c,d],[e,f],...] and the variable "action" would be a 1-by-2 list [a,b] before being appended/extended to x. In theory, the training I want is a batch size of 8 with a 1203-by-2 input shape. However, I get this error:
ValueError: Cannot reshape a tensor with 19248 elements to shape [1,1203,2,1] (2406 elements) for '{{node model/tf.reshape/Reshape}} = Reshape[T=DT_FLOAT, Tshape=DT_INT32](IteratorGetNext, model/tf.reshape/Reshape/shape)' with input shapes: [8,1203,2], [4] and with input tensors computed as partial shapes: input[1] = [1,1203,2,1].
This shows that all the inputs and outputs are being put into the CNN at once which is not what I want. Instead, I want the data to be in a batch of 8. How can I do this??? Am I even using "train_on_batch" correctly
batch_size:: Integer or None. Number of samples per batch of
computation. If unspecified, batch_size will default to 32. Do not
specify the batch_size if your data is in the form of a dataset,
generators, or keras.utils.Sequence instances (since they generate
batches).
Find the below example with batch_size
num_classes = 5
model = Sequential([
layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
history = model.fit(
train_ds,
validation_data=val_ds,
batch_size = 32
epochs=10
)

How can I concatenate dynamic values with the flatten vector in CNN

I'm trying to increase the accuracy of CNN by computing some dynamic values such as Hu moments of the images during the training phase and then feed them to the fully connected layer with the flatten vector as shown in the image of my model:
I want to compute Hu moments for each image in the dataset then after the flatten operation, I want to concatenate the values of the Hu moments with the fatten vector and feed it to the fully connected layer.
This is the model I'm using (Tensorflow Keras):
layer1 = Conv2D(16, (3, 3),padding="same", activation='relu')(inpx)
layer2 = Conv2D(32, kernel_size=(3, 3),padding="same", activation='relu')(layer1)
layer3 = MaxPooling2D(pool_size=(2, 2))(layer2)
layer4 = Conv2D(64, kernel_size=(5, 5),padding="same", activation='relu')(layer3)
layer5 = Conv2D(128, kernel_size=(5, 5),padding="same", activation='relu')(layer4)
layer6 = MaxPooling2D(pool_size=(2, 2))(layer5)
layer7 = Dropout(0.5)(layer6)
layer8 = Flatten()(layer7)
layer9 = Dense(250, activation='sigmoid')(layer8)
layer10 = Dense(10, activation='softmax')(layer9)
model = Model([inpx], layer10)
model.compile(optimizer=keras.optimizers.Adadelta(),
loss=keras.losses.categorical_crossentropy,
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10, batch_size=500)
score = model.evaluate(x_test, y_test, verbose=0)
The dataset I'm using is MNIST handwritten digits.
Hmm I don't know what the Hu moments and the Extend and Soidty are, but I'm assuming they're 1dimensional:
# image = tf.Tensor
#tf.function
def calc_hu(image):
""" calculate hu """
hu = ...
return hu
class HuLayer(tf.keras.layers.Layer):
def call(self, inputs):
return calc_hu(inputs)
#tf.function
def calc_extend(image):
""" calculate extend """
extend = ...
return extend
class ExtendLayer(tf.keras.layers.Layer):
def call(self, inputs):
return calc_extend(inputs)
layer1 = Conv2D(16, (3, 3),padding="same", activation='relu')(inpx)
layer2 = Conv2D(32, kernel_size=(3, 3),padding="same", activation='relu')(layer1)
layer3 = MaxPooling2D(pool_size=(2, 2))(layer2)
layer4 = Conv2D(64, kernel_size=(5, 5),padding="same", activation='relu')(layer3)
layer5 = Conv2D(128, kernel_size=(5, 5),padding="same", activation='relu')(layer4)
layer6 = MaxPooling2D(pool_size=(2, 2))(layer5)
layer7 = Dropout(0.5)(layer6)
layer8 = Flatten()(layer7)
layer8_ = tf.layers.keras.concatenate([layer_8, HuLayer()(tf.keras.layers.Input(input_shape)(inpx)), ExtendLayer()(tf.keras.layers.Input(input_shape)(inpx))])
layer9 = Dense(250, activation='sigmoid')(layer8_)
layer10 = Dense(10, activation='softmax')(layer9)
I didn't test this code but it should set you on your way. Hope it helps you enough to get going!

Keras U-Net weighted loss implementation

I'm trying to separate close objects as was shown in the U-Net paper (here). For this, one generates weight maps which can be used for pixel-wise losses. The following code describes the network I use from this blog post.
x_train_val = # list of images (imgs, 256, 256, 3)
y_train_val = # list of masks (imgs, 256, 256, 1)
y_weights = # list of weight maps (imgs, 256, 256, 1) according to the blog post
# visual inspection confirms the correct calculation of these maps
# Blog posts' loss function
def my_loss(target, output):
return - tf.reduce_sum(target * output,
len(output.get_shape()) - 1)
# Standard Unet model from blog post
_epsilon = tf.convert_to_tensor(K.epsilon(), np.float32)
def make_weighted_loss_unet(input_shape, n_classes):
ip = L.Input(shape=input_shape)
weight_ip = L.Input(shape=input_shape[:2] + (n_classes,))
conv1 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(ip)
conv1 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
conv1 = L.Dropout(0.1)(conv1)
mpool1 = L.MaxPool2D()(conv1)
conv2 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool1)
conv2 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
conv2 = L.Dropout(0.2)(conv2)
mpool2 = L.MaxPool2D()(conv2)
conv3 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool2)
conv3 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
conv3 = L.Dropout(0.3)(conv3)
mpool3 = L.MaxPool2D()(conv3)
conv4 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool3)
conv4 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
conv4 = L.Dropout(0.4)(conv4)
mpool4 = L.MaxPool2D()(conv4)
conv5 = L.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(mpool4)
conv5 = L.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
conv5 = L.Dropout(0.5)(conv5)
up6 = L.Conv2DTranspose(512, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv5)
conv6 = L.Concatenate()([up6, conv4])
conv6 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
conv6 = L.Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)
conv6 = L.Dropout(0.4)(conv6)
up7 = L.Conv2DTranspose(256, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv6)
conv7 = L.Concatenate()([up7, conv3])
conv7 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
conv7 = L.Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)
conv7 = L.Dropout(0.3)(conv7)
up8 = L.Conv2DTranspose(128, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv7)
conv8 = L.Concatenate()([up8, conv2])
conv8 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
conv8 = L.Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
conv8 = L.Dropout(0.2)(conv8)
up9 = L.Conv2DTranspose(64, 2, strides=2, kernel_initializer='he_normal', padding='same')(conv8)
conv9 = L.Concatenate()([up9, conv1])
conv9 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv9 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
conv9 = L.Dropout(0.1)(conv9)
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal')(conv9)
# Mimic crossentropy loss
c11 = L.Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(c10)
c11 = L.Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(c11)
c11 = L.Lambda(lambda x: K.log(x))(c11)
weighted_sm = L.multiply([c11, weight_ip])
model = Model(inputs=[ip, weight_ip], outputs=[weighted_sm])
return model
I then compile and fit the model as is shown below:
model = make_weighted_loss_unet((256, 256, 3), 1) # shape of input, number of classes
model.compile(optimizer='adam', loss=my_loss, metrics=['acc'])
model.fit([x_train_val, y_weights], y_train_val, validation_split=0.1, epochs=1)
The model can then train as usual. However, the loss doesn't seem to improve much. Furthermore, when I try to predict on new images, I obviously don't have the weight maps (because they are calculated on the labeled masks). I tried to use empty / zero arrays shaped like the weight map but that only yields in blank / zero predictions. I also tried different metrics and more standards losses without any success.
Did anyone face the same issue or have an alternative in implementing this weighted loss? Thanks in advance. BBQuercus
A simpler way to write custom loss with pixel weights
In your code, the loss is scattered around, between my_loss and make_weighted_loss_unet functions. You can add targets as an input and use model.add_loss to structure the code better :
def make_weighted_loss_unet(input_shape, n_classes):
ip = L.Input(shape=input_shape)
weight_ip = L.Input(shape=input_shape[:2] + (n_classes,))
targets = L.input(shape=input_shape[:2] + (n_classes,))
# .... rest of your model definition code ...
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal')(conv9)
model.add_loss(pixel_weighted_cross_entropy(weights_ip, targets, c10))
# .... return Model .... NO NEED to specify loss in model.compile
def pixel_weighted_cross_entropy(weights, targets, predictions)
loss_val = keras.losses.categorical_crossentropy(targets, predictions)
weighted_loss_val = weights * loss_val
return K.mean(weighted_loss_val)
If you don't refactor your code to the above approach, next section shows how to still run inference without issues
How to run your model in inference
Option 1 : Use another Model object for inference
You can create a Model used for training and another used for inference. Both are largely the same except that the inference Model does not take weights_ip, and gives an early output c10.
Here's an example code that adds an argument is_training=True to decide which Model to return :
def make_weighted_loss_unet(input_shape, n_classes, is_training=True):
ip = L.Input(shape=input_shape)
conv1 = L.Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(ip)
# .... rest of your model definition code ...
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal')(conv9)
if is_training:
# Mimic crossentropy loss
c11 = L.Lambda(lambda x: x / tf.reduce_sum(x, len(x.get_shape()) - 1, True))(c10)
c11 = L.Lambda(lambda x: tf.clip_by_value(x, _epsilon, 1. - _epsilon))(c11)
c11 = L.Lambda(lambda x: K.log(x))(c11)
weight_ip = L.Input(shape=input_shape[:2] + (n_classes,))
weighted_sm = L.multiply([c11, weight_ip])
return Model(inputs=[ip, weight_ip], outputs=[weighted_sm])
else:
return Model(inputs=[ip], outputs=[c10])
return model
Option 2 : Use K.function
If you don't want to mess with your Model definition method (make_weighted_loss_unet) and want to achieve the same result outside, you can use a function that extracts the subgraph relevant for inference.
In your inference function:
from keras import backend as K
model = make_weighted_loss_unet(input_shape, n_classes)
inference_function = K.function([model.get_layer("input_layer").input],
[model.get_layer("output_softmax_layer").output])
predicted_heatmap = inference_function(new_image)
Note that you'll have to give name= to your ip layer and c10 layer to be able to retrieve them via model.get_layer(name) :
ip = L.Input(shape=input_shape, name="input_layer")
and
c10 = L.Conv2D(n_classes, 1, activation='softmax', kernel_initializer='he_normal', name="output_softmax_layer")(conv9)

How can I limit regression output between 0 to 1 in keras

I am trying to detect the single pixel location of a single object in an image. I have a keras CNN regression network with my image tensor as the input, and a 3 item vector as the output.
First item: Is a 1 (if an object was found) or 0 (no object was found)
Second item: Is a number between 0 and 1 which indicates how far along the x axis is the object
Third item: Is a number between 0 and 1 which indicates how far along the y axis is the object
I have trained the network on 2000 test images and 500 validation images, and the val_loss is far less than 1, and the val_acc is best at around 0.94. Excellent.
But then when I predict the output, I find the values for all three output items are not between 0 and 1, they are actually between -2 and 3 approximately. All three items should be between 0 and 1.
I have not used any non-linear activation functions on the output layer, and have used relus for all non-output layers. Should I be using a softmax, even though it is non-linear? The second and third items are predicting the x and y axis of the image, which appear to me as linear quantities.
Here is my keras network:
inputs = Input((256, 256, 1))
base_kernels = 64
# 256
conv1 = Conv2D(base_kernels, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
conv1 = BatchNormalization()(conv1)
conv1 = Conv2D(base_kernels, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
conv1 = BatchNormalization()(conv1)
conv1 = Dropout(0.2)(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
# 128
conv2 = Conv2D(base_kernels * 2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
conv2 = BatchNormalization()(conv2)
conv2 = Conv2D(base_kernels * 2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
conv2 = BatchNormalization()(conv2)
conv2 = Dropout(0.2)(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
# 64
conv3 = Conv2D(base_kernels * 4, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
conv3 = BatchNormalization()(conv3)
conv3 = Conv2D(base_kernels * 4, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
conv3 = BatchNormalization()(conv3)
conv3 = Dropout(0.2)(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
flat = Flatten()(pool3)
dense = Dense(256, activation='relu')(flat)
output = Dense(3)(dense)
model = Model(inputs=[inputs], outputs=[output])
optimizer = Adam(lr=1e-4)
model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['accuracy'])
Can anyone please help? Thanks! :)
Chris
The sigmoid activation produces outputs between zero and one, so if you use it as activation of your last layer(the output), the network's output will be between zero and one.
output = Dense(3, activation="sigmoid")(dense)

backprop in merged models

I am working on Conditional GANs and my generator and discriminator both have two inputs and use merged models like this:-
z = Input(shape=(100,))
temp = Input(shape=(384,))
generator=Generator()
img = generator([z,temp])
valid = discriminator([img,temp])
combined = Model([z,temp], valid)
combined.compile(loss='binary_crossentropy', optimizer=optimizer)
DCGAN is being used to classify and generate images conditional to the "temp" embedding and I am using Adam "optimizer = Adam(0.0001, 0.5)" for both models.
GEN is like takes input noise "z" and "temp" merges them and makes 128x128x3 images. Disc takes image and performs conv2d on it and then reshapes "temp" to 1,128,3 and merges both and further applies conv2d and outputs a sigmoid unit. My question is that during backprop how are weights updated of a merged model, lets say of Disc here:-
inp1 = Input(shape=(128,128,3),name='inp1')
inp2 = Input(shape=(384,),name='inp2')
d2=Reshape(target_shape=(1,128,3))(inp2)
d1 = Conv2D(16, kernel_size=5, strides=2, padding="same")(inp1)
d1=LeakyReLU(alpha=0.2)(d1)
d1=Dropout(0.25)(d1)
d1 = Conv2D(32, kernel_size=5, strides=2, padding="same")(inp1)
d1=BatchNormalization(momentum=0.8)(d1)
d1=LeakyReLU(alpha=0.2)(d1)
d1=Dropout(0.25)(d1)
d1 = Conv2D(64, kernel_size=5, strides=2, padding="same")(inp1)
d1=BatchNormalization(momentum=0.8)(d1)
d1=LeakyReLU(alpha=0.2)(d1)
d1=Dropout(0.25)(d1)
d1 = Conv2D(128, kernel_size=5, strides=2, padding="same")(inp1)
d1=BatchNormalization(momentum=0.8)(d1)
d1=LeakyReLU(alpha=0.2)(d1)
d1=Dropout(0.25)(d1)
d1 = Conv2D(256, kernel_size=5, strides=2, padding="same")(inp1)
d1=BatchNormalization(momentum=0.8)(d1)
d1=LeakyReLU(alpha=0.2)(d1)
d1=Dropout(0.25)(d1)
d1=Flatten()(d1)
d1=Dense(768, activation="relu")(d1)
d1=Reshape(target_shape=(2,128,3))(d1)
output=concatenate(
[
d1,
d2,
]
,axis=1
)
d1 = Conv2D(64, kernel_size=5, strides=2, padding="same")(output)
d1=BatchNormalization(momentum=0.8)(d1)
d1=LeakyReLU(alpha=0.2)(d1)
d1=Dropout(0.25)(d1)
d1 = Conv2D(128, kernel_size=5, strides=2, padding="same")(inp1)
d1=BatchNormalization(momentum=0.8)(d1)
d1=LeakyReLU(alpha=0.2)(d1)
d1=Dropout(0.25)(d1)
d1 = Conv2D(256, kernel_size=5, strides=2, padding="same")(inp1)
d1=BatchNormalization(momentum=0.8)(d1)
d1=LeakyReLU(alpha=0.2)(d1)
d1=Dropout(0.25)(d1)
d1=Flatten()(d1)
output=Dense(1,activation='sigmoid')(d1)
model=Model(
inputs=[
inp1,
inp2
],
outputs=[
output
]
)
model.summary()
img = Input(shape=(128,128,3))
text=Input(shape=(384,))
validity = model([img,text])
return Model([img,text], validity)
And my disc loss starts from 2.02 and goes to around 6.7 in 150 epochs and loss of Gen decreases from 0.80 to 0.00024 in 150 epochs and I am getting garbage, how can I improve my architecture? And I was wondering that maybe backprop doesn't work well in merged models because it becomes a lot complicated.
I am using batchnorm, leaky relu, conv2d + stride, no pooling layers and label smoothing though.

Categories