Softmax output returning only ones and zeros? - python

my convolutional neural network is returning only ones and zeros on softmax output (out1), anyone knows why?
def build(self):
inp = Input(self.obs_shape)
conv0 = Conv2D(32, 2, 1, padding="same", activation = "relu")(inp)
drop0 = MaxPool2D((2,2))(conv0)
conv1 = Conv2D(64, 3, 2, padding="same", activation = "relu")(drop0)
drop1 = MaxPool2D((2,2))(conv1)
flat = Flatten()(drop1)
hid0 = Dense(128, activation='relu')(flat)
hid1 = Dense(256, activation='relu')(hid0)
hid = Dense(128, activation='relu')(hid1)
out1 = Dense(self.action_count, activation='softmax')(hid)
out2 = Dense(1, activation='linear')(hid)
model = Model(inputs = [inp], outputs = [out1, out2])
model.compile(optimizer = tf.keras.optimizers.Adam(lr = self.lr),
loss = [self.actor_loss, "mse"])
return model
def actor_loss(self, y_actual, y_pred):
actions = tf.cast(y_actual[:, 0], tf.int32)
returns = y_actual[:, 1]
mask = tf.one_hot(actions, self.action_count)
logps = tf.math.log(tf.boolean_mask(y_pred, mask) + 1e-3)
entropy = -tf.math.reduce_sum(y_pred * tf.math.log(y_pred))
return -tf.math.reduce_sum(logps * returns) - 0.0001*entropy

model = Model(inputs = [inp], outputs = [out1, out2])
look at aboveļ¼Œ there only two output.
so, you function build was lock the number of output,
so only get 1 or 0 ;
in one word :you need change your models
sorry ,my english is bad .

Related

compare the precision of a single class in the callback instance in tf.keras.callbacks.Callback

I need a help to design my callback, I have the following architecture:
def CNN_exctractor(input_img):
l2_loss_lambda = 0.01 # the definintion of l2 regaluraiation
l2 = None if l2_loss_lambda is None else regularizers.l2(l2_loss_lambda)
if l2 is not None:
print('Using L2 regularization - l2_loss_lambda = %.7f' % l2_loss_lambda)
conv1 = Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=l2)(input_img)
conv11 = BatchNormalization()(conv1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv11)
conv10 = Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=l2)(pool2)
conv110 = BatchNormalization()(conv10)
pool21 = MaxPooling2D(pool_size=(2, 2))(conv110)
conv3 = Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=l2)( pool21)#conv21)
conv31 = BatchNormalization()(conv3)
conv5 = Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=l2)( conv31)#conv41)
conv51 = BatchNormalization()(conv5)
conv511 = Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu", kernel_regularizer=l2)( conv51)#conv41)
conv5111 = BatchNormalization()(conv511)
#pool3 = MaxPooling2D(pool_size=(2, 2))(conv51)
return conv5111
def fc1(enco):
l2_loss_lambda = 0.01
l2 = None if l2_loss_lambda is None else regularizers.l2(l2_loss_lambda)
if l2 is not None:
print('Using L2 regularization - l2_loss_lambda = %.7f' % l2_loss_lambda)
flat = Flatten()(enco)
den = Dense(256, activation='relu',kernel_regularizer=l2)(flat)#(den_n)#(den_n)
den_n= BatchNormalization()(den)
den1 = Dense(128, activation='relu',kernel_regularizer=l2)(den_n)#(den_n)#(den_n)
den1_n= BatchNormalization()(den1)
out = Dense(2, activation='softmax')(den1_n)
return out
As you can see, I have two neurons at the output, I am using this simple code for the callback:
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if((logs.get('val_accuracy')>= 0.92) and (logs.get('accuracy')>= 0.96) and ):
print("\nReached %2.2f%% accuracy, so stopping training!!" %(0.96*100))
self.model.stop_training = True
I am comparing both training and validation accuracy, what I want to do, is to instead of comparing the whole validation accuracy, I compare the precision of a single class, something like (if it exists)
logs.get('class_1_precision')>= 0.8
You can pass your validation data to your callback, and then filter it for the specific class. I don't know how you've structured your validation data but here I'm assuming it's split into two sets (val_x and val_y). Inside the call back you then get the rows containing the class you need (maybe filtering the val_y for the indices of the class you need and then grabbing the same indices of val_x) - I've left this bit up to you.
from sklearn.metrics import precision_score
class myCallback(tf.keras.callbacks.Callback):
def __init__(self, val_x, val_y):
super(myCallback, self).__init__()
self.val_x = val_x
self.val_y = val_y
def on_epoch_end(self, epoch, logs={}):
# Filter validation data for your required class
val_x_class_1 = #filter self.val_x for your class
val_y_class_1 = #filter self.val_y for your class>
# Get predictions for the filtered val data
class1_scores = self.model.predict(val_x_class_1)
# Get indices of best predictions - you might need to alter this
y_pred_class1 = tf.argmax(class1_scores, axis=1)
y_true_class1 = tf.argmax(val_y_class_1, axis=1)
# Calculate precision
precision_class1 = precision_score(y_true_class1, y_pred_class1)
# Rest of your code
<....>
To pass the validation data to the callback you'll need to add something like the below to your fit function:
cbs = myCallback(val_x,val_y)
model.fit(...., callbacks=[cbs])

error when using the keras api on convelutional layers

I'm trying to build a model that looks like this:
input
/
convlayers/flatten
/ \
first_output \
\ /
second_output
but it fails at the first conv layers with the error:
ValueError: Layer conv2d_4 was called with an input that isn't a symbolic tensor.
Received type: <class 'keras.layers.convolutional.Conv2D'>.
Full input: [<keras.layers.convolutional.Conv2D object at 0x7f450d7b8630>].
All inputs to the layer should be tensors.
and the error points to the layer after the first conv with the inputshape call.
Help would be appreciated.
Here is the code:
conv1 = Conv2D(8, 4, padding = "same", strides = 2)(inputs)
conv2 = Conv2D(16 ,4, padding = "same", strides = 2)(conv1)
flat = Flatten()(conv2)
dense1 = Dense(32)(flat)
dense2 = Dense(32)(dense1)
first_output = Dense(64)(dense2)
merged = concatenate([flat,first_output])
second_output_dense1 = Dense(32)(merged)
second_output_dense2 = Dense(32)(second_output_dense1)
second_output = Dense(64)(second_output_dense2)
model = Model(inputs=conv1, outputs=[first_output,second_output])
model.compile(loss = "mse", optimizer = "adam" )
Answer:
i was under the impression that you could call the model without an input layer and just define the input in the first layer : conv1 = Conv2D(8, 4, padding = "same", strides = 2, input_shape = (6,8,8,))
but that didnt work so instead you have to delete the input shape thing and create an input layer here is the fixed code
inputs = Input(shape=(6,8,8,))
conv1 = Conv2D(8, 4, padding = "same", strides = 2, input_shape = (6,8,8,))
conv2 = Conv2D(16 ,4, padding = "same", strides = 2)(conv1)
flat = Flatten()(conv2)
dense1 = Dense(32)(flat)
dense2 = Dense(32)(dense1)
first_output = Dense(64)(dense2)
merged = concatenate([flat,first_output])
second_output_dense1 = Dense(32)(merged)
second_output_dense2 = Dense(32)(second_output_dense1)
second_output = Dense(64)(second_output_dense2)
model = Model(inputs=inputs, outputs=[first_output,second_output])
model.compile(loss = "mse", optimizer = "adam" )

Multidimensional Regression Network in Keras quickly trends to 0

I had a basic keras network predicting one value and it worked fine. I tried adding support for predicting 4 values, but when I do that, the output instantly trends to 0. Right now, the network is the simplistic model just for testing.
The input for the network is an array of shape (90,) and the output should have 4 values. I tried having an output layer with 4 nodes as well, but that showed the same behavior of this one. I've also tried some various loss functions.
def runNN(training_data,training_labels,test_data,test_labels, model = None):
if model == None:
inp = (Input(shape = (90,), name = 'input'))
model = (Dense(units = 90, activation='relu'(inp)
model = (Dropout(0.5))(model)
model = (Dense(units = 180, activation='relu'))(model)
model = (Dropout(0.5))(model)
output1 = Dense(1, activation = 'relu', name = 'preretirement')(model)
output2 = Dense(1, activation = 'relu',name = 'cola')(model)
output3 = Dense(1, activation = 'relu',name = 'initialNC')(model)
output4 = Dense(1, activation = 'relu',name = 'finalNC')(model)
complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4])
optimizer = Adam(lr = .0003)
complete_model.compile(loss='mean_absolute_error',
optimizer=optimizer,
metrics=['mean_absolute_error'])
complete_model.fit(training_data, {'preretirement' : training_labels[0],
'cola' : training_labels[1],
'initialNC' : training_labels[2],
'finalNC' : training_labels[3]},
epochs = 10, batch_size = 128)
The output after 1 epoch, and anything afterwards, is [0,0,0,0] for each test point. It should be a 4 item list with values between 0 and 1 such as [.34,.56,.12,.87]
Probably you're doing a prediction task or function fitting task. Two suggestions might help you:
sigmoid usually works better than relu in prediction task.
Do not use activation function at the final output layer.
The code below is modified from yours and it works fine.
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam
dropout_rate = .5
activate_function = 'sigmoid'
num_iteration = 20
inp = Input(shape = (90,), name = 'input')
model = Dense(units = 90, activation=activate_function)(inp)
model = Dropout(rate=dropout_rate)(model)
model = Dense(units = 180, activation=activate_function)(model)
model = Dropout(rate=dropout_rate)(model)
output1 = Dense(units=1, name = 'preretirement')(model)
output2 = Dense(units=1, name = 'cola')(model)
output3 = Dense(units=1, name = 'initialNC')(model)
output4 = Dense(units=1, name = 'finalNC')(model)
# # Your original code
# output1 = Dense(units=1, activation = activate_function, name = 'preretirement')(model)
# output2 = Dense(units=1, activation = activate_function,name = 'cola')(model)
# output3 = Dense(units=1, activation = activate_function,name = 'initialNC')(model)
# output4 = Dense(units=1, activation = activate_function,name = 'finalNC')(model)
complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4])
optimizer = Adam(lr = .0003)
complete_model.compile(loss='mean_absolute_error',
optimizer=optimizer,
metrics=['mean_absolute_error'])
# generate data for training the model
import numpy as np
num_train = 4000 # the number of training instances
# a normal distribution with mean=2, variance=1
training_data = np.random.normal(2, 1, (num_train, 90))
training_labels = np.zeros(shape=(num_train, 4))
for i in range(num_train):
tmp = np.sum(training_data[i, :])/90.0
training_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp]
print(training_data.shape, training_labels.shape)
# generate data for testing the model
test_data = np.random.normal(0, 1, (10, 90)) # 10 test instances
test_labels = np.zeros(shape=(10, 4))
for i in range(10):
tmp = np.sum(training_data[i, :])/90.0
test_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp]
print(test_data.shape, test_labels.shape)
complete_model.fit(training_data, {'preretirement' : training_labels[:, 0],
'cola' : training_labels[:, 1],
'initialNC' : training_labels[:, 2],
'finalNC' : training_labels[:, 3]},
epochs = num_iteration,
batch_size = 128)
results = complete_model.predict(test_data)
for i in range(10):
print('true', test_labels[i])
print('predicted', results[0][i, 0], results[1][i, 0], results[2][i, 0], results[3][i, 0])
print('--------------------------')
The code produces

AttributeError: 'NoneType' object has no attribute '_inbound_nodes' while trying to add multiple keras Dense layers

The input are 3 independent channels of 1000 features. I'm trying to pass each channel through a independent NN path, then concatenate them into a flat layer. Then apply a FCN on the flatten layer for a binary classification.
I'm trying to add multiple Dense layers together, like this:
def tst_1():
inputs = Input((3, 1000, 1))
dense10 = Dense(224, activation='relu')(inputs[0,:,1])
dense11 = Dense(112, activation='relu')(dense10)
dense12 = Dense(56, activation='relu')(dense11)
dense20 = Dense(224, activation='relu')(inputs[1,:,1])
dense21 = Dense(112, activation='relu')(dense20)
dense22 = Dense(56, activation='relu')(dense21)
dense30 = Dense(224, activation='relu')(inputs[2,:,1])
dense31 = Dense(112, activation='relu')(dense30)
dense32 = Dense(56, activation='relu')(dense31)
flat = keras.layers.Add()([dense12, dense22, dense32])
dense1 = Dense(224, activation='relu')(flat)
drop1 = Dropout(0.5)(dense1)
dense2 = Dense(112, activation='relu')(drop1)
drop2 = Dropout(0.5)(dense2)
dense3 = Dense(32, activation='relu')(drop2)
densef = Dense(1, activation='sigmoid')(dense3)
model = Model(inputs = inputs, outputs = densef)
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
return model
model = tst_1()
model.summary()
but I got this error:
/usr/local/lib/python2.7/dist-packages/keras/engine/network.pyc in build_map(tensor, finished_nodes, nodes_in_progress, layer, node_index, tensor_index)
1310 ValueError: if a cycle is detected.
1311 """
-> 1312 node = layer._inbound_nodes[node_index]
1313
1314 # Prevent cycles.
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'
The problem is that splitting the input data using inputs[0,:,1] is not done as a keras layer.
You need to create a Lambda layer to be able to accomplish this.
The following code:
from keras import layers
from keras.layers import Input, Add, Dense,Dropout, Lambda, Concatenate
from keras.layers import Flatten
from keras.optimizers import Adam
from keras.models import Model
import keras.backend as K
def tst_1():
num_channels = 3
inputs = Input(shape=(num_channels, 1000, 1))
branch_outputs = []
for i in range(num_channels):
# Slicing the ith channel:
out = Lambda(lambda x: x[:, i, :, :], name = "Lambda_" + str(i))(inputs)
# Setting up your per-channel layers (replace with actual sub-models):
out = Dense(224, activation='relu', name = "Dense_224_" + str(i))(out)
out = Dense(112, activation='relu', name = "Dense_112_" + str(i))(out)
out = Dense(56, activation='relu', name = "Dense_56_" + str(i))(out)
branch_outputs.append(out)
# Concatenating together the per-channel results:
out = Concatenate()(branch_outputs)
dense1 = Dense(224, activation='relu')(out)
drop1 = Dropout(0.5)(dense1)
dense2 = Dense(112, activation='relu')(drop1)
drop2 = Dropout(0.5)(dense2)
dense3 = Dense(32, activation='relu')(drop2)
densef = Dense(1, activation='sigmoid')(dense3)
model = Model(inputs = inputs, outputs = densef)
return model
Net = tst_1()
Net.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
Net.summary()
correctly created the net that you want.
Thanks to #CAta.RAy
I solved it in this way:
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense,Dropout, Lambda
from keras.layers import Flatten
from keras.optimizers import Adam
from keras.models import Model
import keras.backend as K
def tst_1():
inputs = Input((3, 1000))
x1 = Lambda(lambda x:x[:,0])(inputs)
dense10 = Dense(224, activation='relu')(x1)
dense11 = Dense(112, activation='relu')(dense10)
dense12 = Dense(56, activation='relu')(dense11)
x2 = Lambda(lambda x:x[:,1])(inputs)
dense20 = Dense(224, activation='relu')(x2)
dense21 = Dense(112, activation='relu')(dense20)
dense22 = Dense(56, activation='relu')(dense21)
x3 = Lambda(lambda x:x[:,2])(inputs)
dense30 = Dense(224, activation='relu')(x3)
dense31 = Dense(112, activation='relu')(dense30)
dense32 = Dense(56, activation='relu')(dense31)
flat = Add()([dense12, dense22, dense32])
dense1 = Dense(224, activation='relu')(flat)
drop1 = Dropout(0.5)(dense1)
dense2 = Dense(112, activation='relu')(drop1)
drop2 = Dropout(0.5)(dense2)
dense3 = Dense(32, activation='relu')(drop2)
densef = Dense(1, activation='sigmoid')(dense3)
model = Model(inputs = inputs, outputs = densef)
return model
Net = tst_1()
Net.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
Net.summary()

Keras Graph disconnects when combining a LSTM with a CNN

The idea is to train a CNN on a cosine similarity matrix of the hidden states of two bilstms.
I try to get the following code working, but it is failing giving the error message:
Graph disconnected: cannot obtain value for tensor
Tensor("bidirectional_4/concat:0", shape=(?, ?, 100), dtype=float32)
at layer "input_11". The following previous layers were accessed without issue: []
The code to train the model is the following:
def train_model(self, sentences_pair, is_similar,
embedding_meta_data_skt, embedding_meta_data_tib ,
model_save_directory='./'):
tokenizer_skt = embedding_meta_data_skt['tokenizer']
tokenizer_tib = embedding_meta_data_tib['tokenizer']
embedding_matrix_skt = embedding_meta_data_skt['embedding_matrix']
embedding_matrix_tib = embedding_meta_data_tib['embedding_matrix']
train_data_x1, train_data_x2, train_labels, leaks_train, \
val_data_x1, val_data_x2, val_labels, leaks_val = create_train_dev_set(tokenizer_skt, sentences_pair,
is_similar, self.max_sequence_length,
self.validation_split_ratio)
nb_words_skt = len(tokenizer_skt.word_index) + 1
nb_words_tib = len(tokenizer_tib.word_index) + 1
# Creating word embedding layer
embedding_layer_skt = Embedding(nb_words_skt, self.embedding_dim, weights=[embedding_matrix_skt],
input_length=self.max_sequence_length, trainable=False)
embedding_layer_tib = Embedding(nb_words_tib, self.embedding_dim, weights=[embedding_matrix_tib],
input_length=self.max_sequence_length, trainable=False)
# Creating LSTM Encoder
lstm_layer = Bidirectional(LSTM(self.number_lstm_units, dropout=self.rate_drop_lstm, recurrent_dropout=self.rate_drop_lstm,return_sequences=True))
# Creating LSTM Encoder layer for First Sentence
sequence_1_input = Input(shape=(self.max_sequence_length,), dtype='int32')
embedded_sequences_1 = embedding_layer_skt(sequence_1_input)
skt_lstm = lstm_layer(embedded_sequences_1)
# Creating LSTM Encoder layer for Second Sentence
sequence_2_input = Input(shape=(self.max_sequence_length,), dtype='int32')
embedded_sequences_2 = embedding_layer_tib(sequence_2_input)
tib_lstm = lstm_layer(embedded_sequences_2)
A_input = keras.Input(tensor=skt_lstm)
B_input = keras.Input(tensor=tib_lstm)
dist_output = keras.layers.Lambda(pairwise_cosine_sim)([skt_lstm,tib_lstm,A_input,B_input])
dist_output = Reshape((40,40,1))(dist_output)
input_shape = (40,40,1)
cnn_model = Conv2D(128, (2, 2), input_shape=input_shape)(dist_output)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(164, (2, 2))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(192,(3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(192, (3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Conv2D(128, (3, 3))(cnn_model)
cnn_model = BatchNormalization(axis=-1)(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = MaxPooling2D(pool_size=(2,2))(cnn_model)
cnn_model = Dropout(0.40)(cnn_model)
cnn_model = Flatten()(cnn_model)
# Fully connected layer
cnn_model = Dense(256)(cnn_model)
cnn_model = BatchNormalization()(cnn_model)
cnn_model = Activation('relu')(cnn_model)
cnn_model = Dropout(0.5)(cnn_model)
cnn_model = Dense(num_classes)(cnn_model)
preds = Dense(1, activation='sigmoid')(cnn_model)
model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds)
model.compile(loss=keras.losses.binary_crossentropy,
optimizer=keras.optimizers.Adam(lr=learning_rate),
metrics=['accuracy'])
#model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['acc'])
filepath="skt-tib-bs" + str(batch_size) + "-" + "{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint('skt-tib.h5', monitor='val_acc')
callbacks_list = [checkpoint]
model.fit([train_data_x1, train_data_x2, leaks_train], train_labels,validation_data=([val_data_x1, val_data_x2, leaks_val], val_labels),
batch_size=batch_size,
epochs=epochs,
verbose=1,
class_weight = class_weight,
callbacks = callbacks_list)
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.save(file_name)
The definition of the function calculating the pairwise cosine similarity is the following:
def l2_norm(x, axis=None):
square_sum = K.sum(K.square(x), axis=axis, keepdims=True)
norm = K.sqrt(K.maximum(square_sum, K.epsilon()))
return norm
def pairwise_cosine_sim(A_B):
A,B,A_tensor,B_tensor = A_B
A_mag = l2_norm(A, axis=2)
B_mag = l2_norm(B, axis=2)
num = K.batch_dot(A_tensor, K.permute_dimensions(B_tensor, (0,2,1)))
den = (A_mag * K.permute_dimensions(B_mag, (0,2,1)))
dist_mat = num / den
return dist_mat
I Have been trying for a couple of hours to fix it, but it seems to be no good. Somewhere the input and outputs are not connected, but I just can't figure out where the problem lies. Any suggestions on this?
Either remove A_input and B_input entirely as they are not input layers in the first place and use skt_lstm and tib_lstm directly instead of them, or if you would like to keep them pass them as the inputs of the model as well when you are defining the Model since they are actually input layers:
model = Model(inputs=[sequence_1_input, sequence_2_input, A_input, B_input], outputs=preds)
However, you don't need to pass any corresponding arrays for them when calling fit method as they will be fed using their corresponding tensors skt_lstm and tib_lstm (i.e. they will act as wrappers around these tensors).

Categories