I have a tensorflow keras model which I want to train with obtained data. However, when training, there is a loss of nan after the first epoch. The Input has a range [-1|1].
This is my model:
x = Input(shape=(8, 8, 7), name='input')
value_head = Conv2D(
filters=1,
kernel_size=(1, 1),
padding='same',
use_bias=False,
activation='linear',
kernel_regularizer=regularizers.l2(0.0001),
)(x)
value_head = BatchNormalization()(value_head)
value_head = LeakyReLU()(value_head)
value_head = Flatten()(value_head)
value_head = Dense(
20,
use_bias=False,
activation='linear',
kernel_regularizer=regularizers.l2(0.0001),
)(x)
value_head = LeakyReLU()(value_head)
value_head = Dense(
1,
use_bias=False,
activation='tanh',
kernel_regularizer=regularizers.l2(0.0001),
name='value_head',
)(value_head)
policy_head = Conv2D(
filters=2,
kernel_size=(1, 1),
padding='same',
use_bias=False,
activation='linear',
kernel_regularizer=regularizers.l2(0.0001),
)(x)
policy_head = BatchNormalization()(policy_head)
policy_head = LeakyReLU()(policy_head)
policy_head = Flatten()(policy_head)
policy_head = Dense(
self.mOutputDim,
use_bias=False,
activation='linear',
kernel_regularizer=regularizers.l2(0.0001),
name='policy_head',
)(x)
model = Model(inputs=[nn_input], outputs=[value_head, policy_head])
model.compile(loss={'value_head': 'mean_squared_error', 'policy_head': softmax_cross_entropy_with_logits},
optimizer=SGD(lr=0.00001, momentum=0.9),
loss_weights={'value_head': 0.5, 'policy_head': 0.5}
)
Training looks as following:
training_states = np.array(
[self.mAI.convert_training_state_node_to_model_input(current_node)
.reshape((8, 8, 7))
for current_node in training_data.mTrainingStateNodes])
training_targets = {
'value_head': np.array([np.array([training_data.mWinner])]),
'policy_head': np.array([current_node.convert_edges_visit_count_to_policy_map()
for current_node in training_data.mTrainingStateNodes])}
fit = model.fit(training_states,
training_targets,
epochs=3,
verbose=1,
validation_split=0,
batch_size=64)
I would be very grateful for a solution and constructive criticism to my question.
Related
I am trying to implement DCGAN presented in this article. Here is my Generator and Discriminator:
ki = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
def discriminator_model():
discriminator = Sequential([
Conv2D(64, (3,3), strides=(2, 2), padding='same', kernel_initializer=ki, input_shape=[64,64, 3]), # No BatchNormilization in this layer
LeakyReLU(alpha=0.2),
Dropout(0.4),
Conv2D(64, (3,3), strides=(2, 2), padding='same', kernel_initializer=ki),
BatchNormalization(),
LeakyReLU(alpha=0.2),
Dropout(0.4),
Flatten(),
Dense(1, activation='sigmoid', kernel_initializer=ki)
])
return discriminator
===========================================
noise_shape = 100
def generator_model():
generator = Sequential([
Dense(4*4*512, input_shape=[noise_shape]),
Reshape([4,4,512]),
Conv2DTranspose(256, kernel_size=4, strides=2, padding="same", kernel_initializer= ki),
BatchNormalization(),
ReLU(),
Conv2DTranspose(128, kernel_size=4, strides=2, padding="same", kernel_initializer=ki),
BatchNormalization(),
ReLU(),
Conv2DTranspose(64, kernel_size=4, strides=2, padding="same", kernel_initializer=ki),
BatchNormalization(),
ReLU(),
Conv2DTranspose(3, kernel_size=4, strides=2, padding="same", kernel_initializer=ki, activation='tanh') # 3 filters, also no BatchNormilization in this layer
])
return generator
Here I have combined these to to build DCGAN:
DCGAN = Sequential([generator,discriminator])
opt = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
discriminator.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
discriminator.trainable = False
DCGAN.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
Then I prepared my batches and tried to train my model. Here is the code:
epochs = 500
batch_size = 128
loss_from_discriminator_model=[]
loss_from_generator_model=[]
acc_dis = []
acc_gen = []
with tf.device('/gpu:0'):
for epoch in range(epochs):
for i in range(images.shape[0]//batch_size):
# Training Discriminator
noise = np.random.uniform(-1,1,size=[batch_size, noise_shape])
gen_image = generator.predict_on_batch(noise) # Generating fake images
train_dataset = images[i*batch_size:(i+1)*batch_size]
train_labels_real = np.ones(shape=(batch_size,1)) # Real image labels
discriminator.trainable = True
d_loss_real, d_acc_real = discriminator.train_on_batch(train_dataset,train_labels_real) # Training on real images
train_labels_fake = np.zeros(shape=(batch_size,1))
d_loss_fake, d_acc_fake = discriminator.train_on_batch(gen_image,train_labels_fake) # Training on fake images
# Training Generator
noise = np.random.uniform(-1, 1, size=[batch_size,noise_shape])
train_label_fake_for_gen_training = np.ones(shape=(batch_size,1))
discriminator.trainable = False
g_loss, g_acc = DCGAN.train_on_batch(noise, train_label_fake_for_gen_training)
loss_from_discriminator_model.append(d_loss_real+d_loss_fake)
loss_from_generator_model.append(g_loss)
acc_dis.append((d_acc_real + d_acc_fake) / 2)
acc_gen.append(g_acc)
The problem is my modell doesn't seem to learn anything. values of accuracy and loss don't seem rational. Here is a plot of values of generator and discriminator loss during training.
Thanks in advance.
I am trying to get an Adversarial AutoEncoder going using keras Fit method on a keras.model class
but for some reason it is not working.
Keep in mind that I tried updating encoder and decoder at the same time.
I tried giving the disc loss to the encoder with and without the reconstruction loss
The reconstruction loss stayed the same while encoder disc loss kept increasing as the discriminator's own loss kept dropping.
discriminator = keras.Sequential(
[
keras.Input(shape=(4, 4, 128)),
layers.Flatten(),
layers.Dense(128, activation="relu"),
layers.Dense(128, activation="relu"),
layers.Dense(128, activation="relu"),
layers.Dense(1, activation="sigmoid"),
],
name="discriminator",
)
discriminator.summary()
encoder = keras.Sequential(
[
keras.Input(shape=(28, 28, 1)),
layers.Conv2D(24, 3, activation="relu", strides=2, padding="same"),
layers.Conv2D(48, 3, activation="relu", strides=2, padding="same"),
layers.Conv2D(96, 3, activation="relu", strides=2, padding="same"),
layers.Flatten(),
layers.Dense(4 * 4 * 128, activation="linear"),
layers.Reshape((4, 4, 128)),
],
name="encoder",
)
encoder.summary()
decoder = keras.Sequential(
[
keras.Input(shape=(4, 4, 128)),
layers.Flatten(),
layers.Dense(7 * 7 * 64, activation="relu"),
layers.Reshape((7, 7, 64)),
layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same"),
layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same"),
layers.Conv2DTranspose(1, 3, activation="sigmoid", strides=1, padding="same"),
],
name="decoder",
)
I am not sure If it is in the model itself of not. I am using MNIST Dataset for this
class AAE(keras.Model):
def __init__(self, encoder, decoder, discriminator):
super(AAE, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.discriminator = discriminator
self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
self.disc_tracker = keras.metrics.Mean(name="disc_loss")
self.discEnc_tracker = keras.metrics.Mean(name="discEnc_loss")
#property
def metrics(self):
return [
self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.disc_tracker,
self.discEnc_tracker,
]
def compile(self, di_optimizer, e_optimizer,de_optimizer, loss_fn):
super(AAE, self).compile()
self.dis_optimizer = di_optimizer
self.e_optimizer = e_optimizer
self.de_optimizer = de_optimizer
self.lossBCE = loss_fn[0]
self.lossMAE = loss_fn[1]
def train_step(self, data):
latent = self.encoder(data)
batch_size = 200
dists = tf.random.normal((batch_size,4,4,128))
y_real = tf.ones((batch_size, 1))
y_fake = tf.zeros((batch_size, 1))
real_dist_mix = tf.concat((dists, latent),axis=0)
y_real_fake_mix = tf.concat((y_real, y_fake),axis=0)
with tf.GradientTape() as tape:
predictions = self.discriminator(real_dist_mix)
d_loss = self.lossBCE(y_real_fake_mix, predictions)
grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
self.dis_optimizer.apply_gradients(zip(grads, self.discriminator.trainable_weights))
with tf.GradientTape() as Etape, tf.GradientTape() as Dtape:
latent = self.encoder(data)
reconstruction = self.decoder(latent)
reconstruction_loss = self.lossMAE(data, reconstruction)
total_loss = reconstruction_loss
Egrads = Etape.gradient(total_loss, self.encoder.trainable_weights)
self.e_optimizer.apply_gradients(zip(Egrads, self.encoder.trainable_weights))
Dgrads = Dtape.gradient(total_loss, self.decoder.trainable_weights)
self.de_optimizer.apply_gradients(zip(Dgrads, self.decoder.trainable_weights))
with tf.GradientTape() as tape:
latent = self.encoder(data)
predictions = self.discriminator(latent)
e_loss = self.lossBCE(y_fake, predictions)
grads = tape.gradient(e_loss, self.encoder.trainable_weights)
self.e_optimizer.apply_gradients(zip(grads, self.encoder.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.disc_tracker.update_state(d_loss)
self.discEnc_tracker.update_state(e_loss)
return {
"loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"disc_loss": self.disc_tracker.result(),
"discEnc_loss": self.discEnc_tracker.result(),
}
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
mnist_digits = np.concatenate([x_train, x_test], axis=0)
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255
Aae = AAE(encoder, decoder, discriminator)
#vae.compile(optimizer=keras.optimizers.Adam())
Aae.compile(
di_optimizer=keras.optimizers.Adam(learning_rate=0.00001),
e_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
de_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
loss_fn=[tf.keras.losses.BinaryCrossentropy(),tf.keras.losses.MeanAbsoluteError()]
)
h=Aae.fit(mnist_digits, epochs=15, batch_size=200)
I think that the error is here:
with tf.GradientTape() as tape:
latent = self.encoder(data)
predictions = self.discriminator(latent)
e_loss = self.lossBCE(y_fake, predictions)
grads = tape.gradient(e_loss, self.encoder.trainable_weights)
self.e_optimizer.apply_gradients(zip(grads, self.encoder.trainable_weights))
I would put e_loss = self.lossBCE(y_real, predictions), because the encoder tries to fool the discriminator.
here its my code trying to use KERAS TUNER:
datagen = ImageDataGenerator(
rescale=1.0/255.0,
zoom_range=[-2, 2],
width_shift_range=[-25, 25],
height_shift_range=[-25, 25],
rotation_range=40,
shear_range=40,
horizontal_flip=True,
vertical_flip=True,
brightness_range=[0.98,1.05],
featurewise_center=True,
samplewise_center=True,
# channel_shift_range=1.5,
#featurewise_center=True,
#featurewise_std_normalization=True,
validation_split=0.10)
mean,std=auxfunctions.getMeanStdClassification()
datagen.mean=mean
datagen.std=std
numClasses = 5
width=240 #diabetic retinopaty 120 120, drRafael 40 40, 96 96
height=240
input_shape=(width,height,3)
train_generator = datagen.flow_from_dataframe(
dataframe=trainLabels,
directory='./resized_train_cropped',
x_col="image",
y_col="level",
target_size=(240,240),
batch_size=16,
class_mode='categorical',
color_mode='rgb', #quitar o no quitar
subset='training')
validation_generator =datagen.flow_from_dataframe(
dataframe=trainLabels,
directory='./resized_train_cropped',
x_col="image",
y_col="level",
target_size=(240,240),
batch_size=16,
class_mode='categorical',
color_mode='rgb',
subset='validation')
#----------------------------------------------------------------------------------------
def createBaseNetwork(input_shape):
weight_decay = 1e-4
L2_norm = regularizers.l2(weight_decay)
input = Input(shape=input_shape)
print(input)
x = Conv2D(96, (9, 9), activation='relu', name='conv1', kernel_regularizer=L2_norm)(input)
x = MaxPooling2D((3, 3), name='pool1')(x)
x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)
x = Conv2D(384, (5, 5), activation='relu', name='conv2', kernel_regularizer=L2_norm)(x)
x = MaxPooling2D((3, 3), name='pool2')(x)
x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)
x = Conv2D(384, (3, 3), activation='relu', name='conv3')(x)
x = Conv2D(384, (3, 3), activation='relu', name='conv4')(x)
x = Conv2D(256, (3, 3), activation='relu', name='conv5')(x)
x = MaxPooling2D((3, 3), name='pool3')(x)
x = Flatten()(x)
x = Dense(4096, activation='relu', name='fc1')(x)
return Model(input, x)
# ---------------------------------------------------------------------------------
hp=HyperParameters()
baseNetwork=createBaseNetwork(input_shape)
#baseNetwork.load_weights('./ModelWeights2.h5',by_name=True)
for l in baseNetwork.layers:
l.trainable=True
input_a = Input(shape=input_shape,name='input1')
outLayers = baseNetwork(input_a)
outLayers = Dense(2048, activation='relu', name='fc3')(outLayers)
outLayers= Dropout(0.2)(outLayers)
outLayers = Dense(1024, activation='relu', name='fc4')(outLayers)
outLayers= Dropout(0.2)(outLayers)
outLayers = Dense(hp.Int('input_units',min_value=32,max_value=512), activation='relu', name='fc5')(outLayers)
classifier = Dense(numClasses, activation='softmax', name='predictions')(outLayers)
model = Model(input_a, classifier)
model.summary()
tuner = RandomSearch(
model,
objective='val_accuracy',
max_trials=1,
executions_per_trial=1,
directory='./logtunner'
)
tuner.search(
train_generator,
validation_data=validation_generator,
epochs=1,
)
For now im just trying to use it on the last Dense layer, as you can see i just want to stimate a good number of neurons with this:
hp.Int('input_units',min_value=32,max_value=512)
But i get an error like this:
ValueError: TypeError: object of type 'HyperParameters' has no len()
I dont know how to solve it, i spent hours watching videos and tutorials but no idea of what is happening.
I also realize that there is another error mesage:
This function does not handle the case of the path where all inputs are not already EagerTensors
But i dont have any idea about that too
I was having more or less of the same error.
If you pay attention to to the keras-tuner in the tensorflow website https://www.tensorflow.org/tutorials/keras/keras_tuner or in the keras website, you see the following:
tuner = kt.Hyperband(model_builder,
objective = 'val_accuracy',
max_epochs = 10,
factor = 3,
directory = 'my_dir',
project_name = 'intro_to_kt')
the first input to the tuner is the function model_builder that is declared earlier as
def model_builder(hp):
model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=(28, 28)))
# Tune the number of units in the first Dense layer
# Choose an optimal value between 32-512
hp_units = hp.Int('units', min_value = 32, max_value = 512, step = 32)
model.add(keras.layers.Dense(units = hp_units, activation = 'relu'))
model.add(keras.layers.Dense(10))
# Tune the learning rate for the optimizer
# Choose an optimal value from 0.01, 0.001, or 0.0001
hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4])
model.compile(optimizer = keras.optimizers.Adam(learning_rate = hp_learning_rate),
loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
metrics = ['accuracy'])
return model
So, all you need is reorganize your code to follow the same structure. You need to encapsulate the keras model and keras-tuner hp inside a function.
Cheers.
I have one question.
I want to print a confusion matrix.
my model is functional api of keras.
and
model = Model(inputs=[data_input], outputs=[output_1, output_2])
output_1 = 9 classes
output_2 = 5 classes
My multi-classification model
data_input = Input(shape=(trainX.shape[1], trainX.shape[2]))
Conv1 = Conv1D(filters=50, kernel_size=4, padding='valid', activation='relu', strides=1)(data_input)
Conv1 = MaxPooling1D(pool_size=2)(Conv1)
Conv2 = Conv1D(filters=50, kernel_size=4, padding='valid', activation='relu', strides=1)(Conv1)
Conv2 = MaxPooling1D(pool_size=2)(Conv2)
Conv3 = Conv1D(filters=50, kernel_size=4, padding='valid', activation='relu', strides=1)(Conv2)
Conv3 = MaxPooling1D(pool_size=2)(Conv3)
Classification1 = LSTM(128, input_shape=(47, 50), return_sequences=False)(Conv3)
Classification2 = GRU(128, input_shape=(47, 50), return_sequences=False)(Conv3)
activity = Dense(9)(Classification1)
activity = Activation('softmax')(activity)
speed = Dense(5)(Classification2)
speed = Activation('softmax')(speed)
model = Model(inputs=[data_input], outputs=[activity, speed])
model.compile(loss= 'categorical_crossentropy' , optimizer='adam', metrics=[ 'accuracy' ])
print(model.summary())
history = model.fit(trainX, {'activation_1': trainY_Activity, 'activation_2': trainY_Speed},
validation_data=(testX, {'activation_1': testY_Activity, 'activation_2': testY_Speed}),
epochs=epochs, batch_size=batch_size, verbose=1, shuffle=False)
I am trying to do Author Identification, my train_vecs_w2v.shape = (15663, 400).
y_train.shape = (15663,3) which has 3 label one hot encoded.
Now the problem is I am having an error in the Embedding layer. Indices[0,X] = -1 is not in [0, 15663). How to solve this? Is it my code or Keras/Tensorflow?
print('Building Model')
n=19579
max_features = 15663
max_length = 400
EMBEDDING_DIM = 100
model7 = Sequential()
model7.add(Embedding(len(train_vecs_w2v), EMBEDDING_DIM, input_length=max_length, dtype='float32', trainable=True, weights=None, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None))
print(model7.output_shape)
model7.add(Convolution1D(filters =128, kernel_size = 3, strides=1, activation='relu', use_bias=False, border_mode='same'))
print(model7.output_shape)
model7.add(MaxPooling1D(pool_size = 3))
print(model7.output_shape)
model7.add(Convolution1D(filters = 64, kernel_size = 5, strides=1, activation='relu', border_mode='same'))
print(model7.output_shape)
model7.add(MaxPooling1D(pool_size = 5))
print(model7.output_shape)
model7.add(Flatten()) # model.output_shape == (None, 64*input_shape of convolution layer)
print(model7.output_shape)
model7.add(Dense(output_dim = 64, activation='relu')) # input_shape = (batch_size, input_dim)
print(model7.output_shape)
model7.add(Dense(output_dim = 32, activation='relu'))
print(model7.output_shape)
model7.add(Dense(output_dim = 3, activation='softmax'))
model7.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['categorical_accuracy'])
model7.fit(train_vecs_w2v, y_train_vec, epochs=50, batch_size=32, verbose=2)
The error I am getting
InvalidArgumentError (see above for traceback): indices[0,1] = -1 is not in [0, 15663)
[[Node: embedding_1/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](embedding_1/embeddings/read, embedding_1/Cast)]]
I think the problem here is with the word vectors count.
It should be
len(train_vecs_w2v) + 1