I'm trying to implement a neural network in Python (Keras) that will predict the probability of multiple outcomes. At the moment I have the following code, for simplicity I reduced the problem to 3 inputs and 3 outputs:
import keras as k
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data_frame = pd.read_csv("123.csv")
input_names = ["Sex", "Age", "IQ"]
output_names = ["OUTPUT1", "OUTPUT2", "OUTPUT3"]
raw_input_data = data_frame[input_names]
raw_output_data = data_frame[output_names]
max_age = 100
encoders = {"Age": lambda age: [age/max_age],
"Sex": lambda gen: {"male": [0], "female": [1]}.get(gen),
"IQ": lambda iq_value: [iq_value],
"OUTPUT1": lambda output1_value: [output1_value],
"OUTPUT2": lambda output2_value: [output2_value],
"OUTPUT3": lambda output3_value: [output3_value]}
def dataframe_to_dict(df):
result = dict()
for column in df.columns:
values = data_frame[column].values
result[column] = values
return result
def make_supervised(df):
raw_input_data = data_frame[input_names]
raw_output_data = data_frame[output_names]
return {"inputs": dataframe_to_dict(raw_input_data),
"outputs": dataframe_to_dict(raw_output_data)}
def encode(data):
vectors = []
for data_name, data_values in data.items():
encoded = list(map(encoders[data_name], data_values))
vectors.append(encoded)
formatted = []
for vector_raw in list(zip(*vectors)):
vector = []
for element in vector_raw:
for e in element:
vector.append(e)
formatted.append(vector)
return formatted
supervised = make_supervised(data_frame)
encoded_inputs = np.array(encode(supervised["inputs"]))
encoded_outputs = np.array(encode(supervised["outputs"]))
train_x = encoded_inputs[:300]
train_y = encoded_outputs[:300]
test_x = encoded_inputs[300:]
test_y = encoded_outputs[300:]
model = k.Sequential()
model.add(k.layers.Dense(units=5, activation="relu"))
model.add(k.layers.Dense(units=1, activation="sigmoid"))
model.compile(loss="mse", optimizer="sgd", metrics=["accuracy"])
fit_results = model.fit(x=train_x, y=train_y, epochs=100, validation_split=0.2)
plt.title("Losses train/validation")
plt.plot(fit_results.history["loss"], label="Train")
plt.plot(fit_results.history["val_loss"], label="Validation")
plt.legend()
plt.show()
plt.title("Accuracies train/validation")
plt.plot(fit_results.history["accuracy"], label="Train")
plt.plot(fit_results.history["val_accuracy"], label="Validation")
plt.legend()
plt.show()
predicted_test = model.predict(test_x)
real_data = data_frame.iloc[300:][input_names+output_names]
real_data["POUTPUT1", "POUTPUT2", "POUTPUT3"] = predicted_test
print(real_data)
real_data.to_csv('C:/***/133.csv')
I need help implementing the output of probabilities for all 3 outcomes [POUTPUT1, POUTPUT2, POUTPUT3] (currently outputs only 1) and saving them in a table like this one:
You need to adapt input and output of your model, and change your sigmoid output activation for an activation that supports categories (softmax for example) Try something like this:
INPUT_DIM = 3
OUTPUT_DIM = 3
# first define your model
model = k.models.Sequential()
model.add(k.layers.Dense(8, activation='relu', input_dim = INPUT_DIM ))
model.add(k.layers.Dense(8, activation='relu'))
## you can add more layer if you want, to customize your model
model.add(k.layers.Dense(OUTPUT_DIM, activation='softmax'))
# then compile
model.compile(loss="mse", optimizer="sgd", metrics=["accuracy"])
# then fit
fit_results = model.fit(train_x, train_y, epochs=100, validation_split=0.2)
So, I tested your code with the changes I suggested, and the network seems to work.
Try this :
import keras as k
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data_frame = pd.read_csv("123.csv")
input_names = ["Sex", "Age", "IQ"]
output_names = ["OUTPUT1", "OUTPUT2", "OUTPUT3"]
raw_input_data = data_frame[input_names]
raw_output_data = data_frame[output_names]
max_age = 100
encoders = {"Age": lambda age: [age/max_age],
"Sex": lambda gen: {"male": [0], "female": [1]}.get(gen),
"IQ": lambda iq_value: [iq_value],
"OUTPUT1": lambda output1_value: [output1_value],
"OUTPUT2": lambda output2_value: [output2_value],
"OUTPUT3": lambda output3_value: [output3_value]}
def dataframe_to_dict(df):
result = dict()
for column in df.columns:
values = data_frame[column].values
result[column] = values
return result
def make_supervised(df):
raw_input_data = data_frame[input_names]
raw_output_data = data_frame[output_names]
return {"inputs": dataframe_to_dict(raw_input_data),
"outputs": dataframe_to_dict(raw_output_data)}
def encode(data):
vectors = []
for data_name, data_values in data.items():
encoded = list(map(encoders[data_name], data_values))
vectors.append(encoded)
formatted = []
for vector_raw in list(zip(*vectors)):
vector = []
for element in vector_raw:
for e in element:
vector.append(e)
formatted.append(vector)
return formatted
supervised = make_supervised(data_frame)
encoded_inputs = np.array(encode(supervised["inputs"]))
encoded_outputs = np.array(encode(supervised["outputs"]))
print(encoded_inputs)
print(encoded_outputs)
train_x = encoded_inputs[:-10]
train_y = encoded_outputs[:-10]
test_x = encoded_inputs[-10:] # I changed this to fit my fake data
test_y = encoded_outputs[-10:] # but you can keep your code.
INPUT_DIM = 3
OUTPUT_DIM = 3
# first define your model
model = k.models.Sequential()
model.add(k.layers.Dense(8, activation='relu', input_dim = INPUT_DIM ))
model.add(k.layers.Dense(8, activation='relu'))
model.add(k.layers.Dense(OUTPUT_DIM, activation='softmax'))
# then compile
model.compile(loss="mse", optimizer="sgd", metrics=["accuracy"])
# then fit
fit_results = model.fit(train_x, train_y, epochs=100, validation_split=0.2)
# plt.title("Losses train/validation")
# plt.plot(fit_results.history["loss"], label="Train")
# plt.plot(fit_results.history["val_loss"], label="Validation")
# plt.legend()
# plt.show()
# plt.title("Accuracies train/validation")
# plt.plot(fit_results.history["accuracy"], label="Train")
# plt.plot(fit_results.history["val_accuracy"], label="Validation")
# plt.legend()
# plt.show()
predicted_test = model.predict(test_x)
print(predicted_test[0])
Then, when i print predicted_test[0], it gives me the outputs :
[[0.9967424 0.00114053 0.00211706]]
After that, I don't know exactly what you want to do with the dataframe, but I would try something like :
real_data = data_frame.iloc[-2:][input_names+output_names]
real_data.reset_index(inplace=True)
real_data["POUTPUT1"] = predicted_test[:,0]
real_data["POUTPUT2"] = predicted_test[:,1]
real_data["POUTPUT3"] = predicted_test[:,2]
print(real_data)
# then save it
real_data.to_csv(...)
3rd edit to solve your problem, I think it's ok now, the initial question is solve.
You should close this topic and open a new one if you encounter a new issue.
Related
```import numpy as np
import glob
import os
from keras.models import Model
from keras.layers import Input, Dense, GRU, CuDNNGRU, CuDNNLSTM
from keras import optimizers
import h5py
from sklearn.model_selection import train_test_split
from keras.models import load_model
def language_name(index):
if index == 0:
return "English"
elif index == 1:
return "Hindi"
elif index == 2:
return "Mandarin"
# ---------------------------BLOCK 1------------------------------------
# COMMENT/UNCOMMENT BELOW CODE BLOCK -
# Below code extracts mfcc features from the files provided into a dataset
codePath = './train/'
num_mfcc_features = 64
english_mfcc = np.array([]).reshape(0, num_mfcc_features)
for file in glob.glob(codePath + 'english/*.npy'):
current_data = np.load(file).T
english_mfcc = np.vstack((english_mfcc, current_data))
hindi_mfcc = np.array([]).reshape(0, num_mfcc_features)
for file in glob.glob(codePath + 'hindi/*.npy'):
current_data = np.load(file).T
hindi_mfcc = np.vstack((hindi_mfcc, current_data))
mandarin_mfcc = np.array([]).reshape(0, num_mfcc_features)
for file in glob.glob(codePath + 'mandarin/*.npy'):
current_data = np.load(file).T
mandarin_mfcc = np.vstack((mandarin_mfcc, current_data))
# Sequence length is 10 seconds
sequence_length = 1000
list_english_mfcc = []
num_english_sequence = int(np.floor(len(english_mfcc)/sequence_length))
for i in range(num_english_sequence):
list_english_mfcc.append(english_mfcc[sequence_length*i:sequence_length*(i+1)])
list_english_mfcc = np.array(list_english_mfcc)
english_labels = np.full((num_english_sequence, 1000, 3), np.array([1, 0, 0]))
list_hindi_mfcc = []
num_hindi_sequence = int(np.floor(len(hindi_mfcc)/sequence_length))
for i in range(num_hindi_sequence):
list_hindi_mfcc.append(hindi_mfcc[sequence_length*i:sequence_length*(i+1)])
list_hindi_mfcc = np.array(list_hindi_mfcc)
hindi_labels = np.full((num_hindi_sequence, 1000, 3), np.array([0, 1, 0]))
list_mandarin_mfcc = []
num_mandarin_sequence = int(np.floor(len(mandarin_mfcc)/sequence_length))
for i in range(num_mandarin_sequence):
list_mandarin_mfcc.append(mandarin_mfcc[sequence_length*i:sequence_length*(i+1)])
list_mandarin_mfcc = np.array(list_mandarin_mfcc)
mandarin_labels = np.full((num_mandarin_sequence, 1000, 3), np.array([0, 0, 1]))
del english_mfcc
del hindi_mfcc
del mandarin_mfcc
total_sequence_length = num_english_sequence + num_hindi_sequence + num_mandarin_sequence
Y_train = np.vstack((english_labels, hindi_labels))
Y_train = np.vstack((Y_train, mandarin_labels))
X_train = np.vstack((list_english_mfcc, list_hindi_mfcc))
X_train = np.vstack((X_train, list_mandarin_mfcc))
del list_english_mfcc
del list_hindi_mfcc
del list_mandarin_mfcc
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2)
with h5py.File("mfcc_dataset.hdf5", 'w') as hf:
hf.create_dataset('X_train', data=X_train)
hf.create_dataset('Y_train', data=Y_train)
hf.create_dataset('X_val', data=X_val)
hf.create_dataset('Y_val', data=Y_val)
# ---------------------------------------------------------------
# --------------------------BLOCK 2-------------------------------------
# Load MFCC Dataset created by the code in the previous steps
with h5py.File("mfcc_dataset.hdf5", 'r') as hf:
X_train = hf['X_train'][:]
Y_train = hf['Y_train'][:]
X_val = hf['X_val'][:]
Y_val = hf['Y_val'][:]
# ---------------------------------------------------------------
# ---------------------------BLOCK 3------------------------------------
# Setting up the model for training
DROPOUT = 0.3
RECURRENT_DROP_OUT = 0.2
optimizer = optimizers.Adam(decay=1e-4)
main_input = Input(shape=(sequence_length, 64), name='main_input')
# ### main_input = Input(shape=(None, 64), name='main_input')
# ### pred_gru = GRU(4, return_sequences=True, name='pred_gru')(main_input)
# ### rnn_output = Dense(3, activation='softmax', name='rnn_output')(pred_gru)
layer1 = CuDNNLSTM(64, return_sequences=True, name='layer1')(main_input)
layer2 = CuDNNLSTM(32, return_sequences=True, name='layer2')(layer1)
layer3 = Dense(100, activation='tanh', name='layer3')(layer2)
rnn_output = Dense(3, activation='softmax', name='rnn_output')(layer3)
model = Model(inputs=main_input, outputs=rnn_output)
print('\nCompiling model...')
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])
model.summary()
history = model.fit(X_train, Y_train, batch_size=32, epochs=75, validation_data=(X_val, Y_val), shuffle=True, verbose=1)
model.save('sld.hdf5')
# ---------------------------------------------------------------
# --------------------------BLOCK 4-------------------------------------
# Inference Mode Setup
streaming_input = Input(name='streaming_input', batch_shape=(1, 1, 64))
pred_layer1 = CuDNNLSTM(64, return_sequences=True, name='layer1', stateful=True)(streaming_input)
pred_layer2 = CuDNNLSTM(32, return_sequences=True, name='layer2')(pred_layer1)
pred_layer3 = Dense(100, activation='tanh', name='layer3')(pred_layer2)
pred_output = Dense(3, activation='softmax', name='rnn_output')(pred_layer3)
streaming_model = Model(inputs=streaming_input, outputs=pred_output)
streaming_model.load_weights('sld.hdf5')
# streaming_model.summary()
# ---------------------------------------------------------------
# ---------------------------BLOCK 5------------------------------------
# Language Prediction for a random sequence from the validation data set
random_val_sample = np.random.randint(0, X_val.shape[0])
random_sequence_num = np.random.randint(0, len(X_val[random_val_sample]))
test_single = X_val[random_val_sample][random_sequence_num].reshape(1, 1, 64)
val_label = Y_val[random_val_sample][random_sequence_num]
true_label = language_name(np.argmax(val_label))
print("***********************")
print("True label is ", true_label)
single_test_pred_prob = streaming_model.predict(test_single)
pred_label = language_name(np.argmax(single_test_pred_prob))
print("Predicted label is ", pred_label)
print("***********************")
# ---------------------------------------------------------------
# ---------------------------BLOCK 6------------------------------------
## COMMENT/UNCOMMENT BELOW
# Prediction for all sequences in the validation set - Takes very long to run
print("Predicting labels for all sequences - (Will take a lot of time)")
list_pred_labels = []
for i in range(X_val.shape[0]):
for j in range(X_val.shape[1]):
test = X_val[i][j].reshape(1, 1, 64)
seq_predictions_prob = streaming_model.predict(test)
predicted_language_index = np.argmax(seq_predictions_prob)
list_pred_labels.append(predicted_language_index)
pred_english = list_pred_labels.count(0)
pred_hindi = list_pred_labels.count(1)
pred_mandarin = list_pred_labels.count(2)
print("Number of English labels = ", pred_english)
print("Number of Hindi labels = ", pred_hindi)
print("Number of Mandarin labels = ", pred_mandarin)
# ---------------------------------------------------------------
```
```Traceback (most recent call last):
File "C:\Users\SKYLAND-2\Documents\nipunmanral SLR\language_identification.py", line 79, in <module>
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2)
File "C:\Users\SKYLAND-2\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection\_split.py", line 2417, in train_test_split
arrays = indexable(*arrays)
File "C:\Users\SKYLAND-2\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils\validation.py", line 378, in indexable
check_consistent_length(*result)
File "C:\Users\SKYLAND-2\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils\validation.py", line 332, in check_consistent_length
raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3, 0]```
hi, i am trying to run the code which belong to nipunmanral spoken language identification and i received this error. this is my first time learning machine learning, i am trying to learn spoken language identification which classify what type of language from an audio. i hope someone can share some tutorial or fix the error.
I'm trying to train TFBertForNextSentencePrediction on my own corpus, not from scratch, but rather taking the existing bert model with only a next sentence prediction head and further train it on a specific cuprous of text (pairs of sentences). Then I want to use the model I trained to be able to extract sentence embeddings from the last hidden state for other texts.
Currently the problem I encounter is that after I train the keras model I am not able to extract the hidden states of the last layer before the next sentence prediction head.
Below is the code. Here I only train it on a few sentences just to make sure the code works.
Any help will be greatly appreciated.
Thanks,
Ayala
import numpy as np
import pandas as pd
import tensorflow as tf
from datetime import datetime
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.callbacks import ModelCheckpoint
from transformers import BertTokenizer, PreTrainedTokenizer, BertConfig, TFBertForNextSentencePrediction
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
PRETRAINED_MODEL = 'bert-base-uncased'
# set paths and file names
time_stamp = str(datetime.now().year) + "_" + str(datetime.now().month) + "_" + str(datetime.now().day) + "_" + \
str(datetime.now().hour) + "_" + str(datetime.now().minute)
model_name = "pretrained_nsp_model"
model_dir_data = model_name + "_" + time_stamp
model_fn = model_dir_data + ".h5"
base_path = os.path.dirname(__file__)
input_path = os.path.join(base_path, "input_data")
output_path = os.path.join(base_path, "output_models")
model_path = os.path.join(output_path, model_dir_data)
if not os.path.exists(model_path):
os.makedirs(model_path)
# set model checkpoint
checkpoint = ModelCheckpoint(os.path.join(model_path, model_fn), monitor="val_loss", verbose=1, save_best_only=True,
save_weights_only=True, mode="min")
# read data
max_length = 512
def get_tokenizer(pretrained_model_name):
tokenizer = BertTokenizer.from_pretrained(pretrained_model_name)
return tokenizer
def tokenize_nsp_data(A, B, max_length):
data_inputs = tokenizer(A, B, add_special_tokens=True, max_length=max_length, truncation=True,
pad_to_max_length=True, return_attention_mask=True,
return_tensors="tf")
return data_inputs
def get_data_features(data_inputs, max_length):
data_features = {}
for key in data_inputs:
data_features[key] = sequence.pad_sequences(data_inputs[key], maxlen=max_length, truncating="post",
padding="post", value=0)
return data_features
def get_transformer_model(transformer_model_name):
# get transformer model
config = BertConfig(output_attentions=True)
config.output_hidden_states = True
config.return_dict = True
transformer_model = TFBertForNextSentencePrediction.from_pretrained(transformer_model_name, config=config)
return transformer_model
def get_keras_model(transformer_model):
# get keras model
input_ids = tf.keras.layers.Input(shape=(max_length,), name='input_ids', dtype='int32')
input_masks_ids = tf.keras.layers.Input(shape=(max_length,), name='attention_mask', dtype='int32')
token_type_ids = tf.keras.layers.Input(shape=(max_length,), name='token_type_ids', dtype='int32')
X = transformer_model({'input_ids': input_ids, 'attention_mask': input_masks_ids, 'token_type_ids': token_type_ids})[0]
model = tf.keras.Model(inputs=[input_ids, input_masks_ids, token_type_ids], outputs=X)
model.summary()
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer=tf.optimizers.Adam(learning_rate=0.00005), metrics=['accuracy'])
return model
def get_metrices(true_values, pred_values):
cm = confusion_matrix(true_values, pred_values)
acc_score = accuracy_score(true_values, pred_values)
f1 = f1_score(true_values, pred_values, average="binary")
precision = precision_score(true_values, pred_values, average="binary")
recall = recall_score(true_values, pred_values, average="binary")
metrices = {'confusion_matrix': cm,
'acc_score': acc_score,
'f1': f1,
'precision': precision,
'recall': recall
}
for k, v in metrices.items():
print(k, ':\n', v)
return metrices
# get tokenizer
tokenizer = get_tokenizer(PRETRAINED_MODEL)
# train
prompt = ["Hello", "Hello", "Hello", "Hello"]
next_sentence = ["How are you?", "Pizza", "How are you?", "Pizza"]
train_labels = [0, 1, 0, 1]
train_labels = to_categorical(train_labels)
train_inputs = tokenize_nsp_data(prompt, next_sentence, max_length)
train_data_features = get_data_features(train_inputs, max_length)
# val
prompt = ["Hello", "Hello", "Hello", "Hello"]
next_sentence = ["How are you?", "Pizza", "How are you?", "Pizza"]
val_labels = [0, 1, 0, 1]
val_labels = to_categorical(val_labels)
val_inputs = tokenize_nsp_data(prompt, next_sentence, max_length)
val_data_features = get_data_features(val_inputs, max_length)
# get transformer model
transformer_model = get_transformer_model(PRETRAINED_MODEL)
# get keras model
model = get_keras_model(transformer_model)
callback_list = []
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4, min_delta=0.005, verbose=1)
callback_list.append(early_stop)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, epsilon=0.001)
callback_list.append(reduce_lr)
callback_list.append(checkpoint)
history = model.fit([train_data_features['input_ids'], train_data_features['attention_mask'],
train_data_features['token_type_ids']], np.array(train_labels), batch_size=2, epochs=3,
validation_data=([val_data_features['input_ids'], val_data_features['attention_mask'],
val_data_features['token_type_ids']], np.array(val_labels)), verbose=1,
callbacks=callback_list)
model.layers[3].save_pretrained(model_path) # need to save this and make sure i can get the hidden states
## predict
# load model
transformer_model = get_transformer_model(model_path)
model = get_keras_model(transformer_model)
model.summary()
model.load_weights(os.path.join(model_path, model_fn))
# test
prompt = ["Hello", "Hello"]
next_sentence = ["How are you?", "Pizza"]
test_labels = [0, 1]
test_df = pd.DataFrame({'A': prompt, 'B': next_sentence, 'label': test_labels})
test_labels = to_categorical(val_labels)
test_inputs = tokenize_nsp_data(prompt, next_sentence, max_length)
test_data_features = get_data_features(test_inputs, max_length)
# predict
pred_test = model.predict([test_data_features['input_ids'], test_data_features['attention_mask'], test_data_features['token_type_ids']])
preds = tf.keras.activations.softmax(tf.convert_to_tensor(pred_test)).numpy()
true_test = test_df['label'].to_list()
pred_test = [1 if p[1] > 0.5 else 0 for p in preds]
test_df['pred_val'] = pred_test
metrices = get_metrices(true_test, pred_test)
I am also attaching a picture from the debugging mode in which I try (with no success) to view the hidden state. The problem is I am not able to see and save the transform model I trained and view the embeddings of the last hidden state. I tried converting the KerasTensor to numpy array but without success.
The issue resides in your 'get_keras_model()' function. You defined here that you are only interested in the first of the element of the output (i.e. logits) with:
X = transformer_model({'input_ids': input_ids, 'attention_mask': input_masks_ids, 'token_type_ids': token_type_ids})[0]
Just do the index selection as conditional like this to get the whole output of the model
def get_keras_model(transformer_model, is_training=True):
###your other code
X = transformer_model({'input_ids': input_ids, 'attention_mask': input_masks_ids, 'token_type_ids': token_type_ids})
if is_training:
X= X[0]
###your other code
return model
#predict
###your other code
model = get_keras_model(transformer_model, is_training=False)
###your other code
print(pred_test.keys())
Output:
odict_keys(['logits', 'hidden_states', 'attentions'])
P.S.: The BertTokenizer can truncate and add padding by themself (documentation).
I had a basic keras network predicting one value and it worked fine. I tried adding support for predicting 4 values, but when I do that, the output instantly trends to 0. Right now, the network is the simplistic model just for testing.
The input for the network is an array of shape (90,) and the output should have 4 values. I tried having an output layer with 4 nodes as well, but that showed the same behavior of this one. I've also tried some various loss functions.
def runNN(training_data,training_labels,test_data,test_labels, model = None):
if model == None:
inp = (Input(shape = (90,), name = 'input'))
model = (Dense(units = 90, activation='relu'(inp)
model = (Dropout(0.5))(model)
model = (Dense(units = 180, activation='relu'))(model)
model = (Dropout(0.5))(model)
output1 = Dense(1, activation = 'relu', name = 'preretirement')(model)
output2 = Dense(1, activation = 'relu',name = 'cola')(model)
output3 = Dense(1, activation = 'relu',name = 'initialNC')(model)
output4 = Dense(1, activation = 'relu',name = 'finalNC')(model)
complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4])
optimizer = Adam(lr = .0003)
complete_model.compile(loss='mean_absolute_error',
optimizer=optimizer,
metrics=['mean_absolute_error'])
complete_model.fit(training_data, {'preretirement' : training_labels[0],
'cola' : training_labels[1],
'initialNC' : training_labels[2],
'finalNC' : training_labels[3]},
epochs = 10, batch_size = 128)
The output after 1 epoch, and anything afterwards, is [0,0,0,0] for each test point. It should be a 4 item list with values between 0 and 1 such as [.34,.56,.12,.87]
Probably you're doing a prediction task or function fitting task. Two suggestions might help you:
sigmoid usually works better than relu in prediction task.
Do not use activation function at the final output layer.
The code below is modified from yours and it works fine.
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam
dropout_rate = .5
activate_function = 'sigmoid'
num_iteration = 20
inp = Input(shape = (90,), name = 'input')
model = Dense(units = 90, activation=activate_function)(inp)
model = Dropout(rate=dropout_rate)(model)
model = Dense(units = 180, activation=activate_function)(model)
model = Dropout(rate=dropout_rate)(model)
output1 = Dense(units=1, name = 'preretirement')(model)
output2 = Dense(units=1, name = 'cola')(model)
output3 = Dense(units=1, name = 'initialNC')(model)
output4 = Dense(units=1, name = 'finalNC')(model)
# # Your original code
# output1 = Dense(units=1, activation = activate_function, name = 'preretirement')(model)
# output2 = Dense(units=1, activation = activate_function,name = 'cola')(model)
# output3 = Dense(units=1, activation = activate_function,name = 'initialNC')(model)
# output4 = Dense(units=1, activation = activate_function,name = 'finalNC')(model)
complete_model = Model(inputs = [inp], outputs = [output1, output2, output3, output4])
optimizer = Adam(lr = .0003)
complete_model.compile(loss='mean_absolute_error',
optimizer=optimizer,
metrics=['mean_absolute_error'])
# generate data for training the model
import numpy as np
num_train = 4000 # the number of training instances
# a normal distribution with mean=2, variance=1
training_data = np.random.normal(2, 1, (num_train, 90))
training_labels = np.zeros(shape=(num_train, 4))
for i in range(num_train):
tmp = np.sum(training_data[i, :])/90.0
training_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp]
print(training_data.shape, training_labels.shape)
# generate data for testing the model
test_data = np.random.normal(0, 1, (10, 90)) # 10 test instances
test_labels = np.zeros(shape=(10, 4))
for i in range(10):
tmp = np.sum(training_data[i, :])/90.0
test_labels[i, :] = [tmp, np.sin(tmp), np.cos(tmp), tmp*tmp]
print(test_data.shape, test_labels.shape)
complete_model.fit(training_data, {'preretirement' : training_labels[:, 0],
'cola' : training_labels[:, 1],
'initialNC' : training_labels[:, 2],
'finalNC' : training_labels[:, 3]},
epochs = num_iteration,
batch_size = 128)
results = complete_model.predict(test_data)
for i in range(10):
print('true', test_labels[i])
print('predicted', results[0][i, 0], results[1][i, 0], results[2][i, 0], results[3][i, 0])
print('--------------------------')
The code produces
There is a neural network that classifies the sentiment of the reviews. The accuracy is not 100%, hence there are texts that are recognized by the network incorrectly. How can I see them? I tried my function, but it gives an error
data = pd.concat([positive_train_data,negative_train_data,positive_test_data,negative_test_data],ignore_index = True)
data.reset_index(drop=True,inplace=True)
x = data.Text
y = data.Sentiment
x_train, x_test, y_train1, y_test = train_test_split(x, y, test_size = 0.50, random_state = 2000)
print( "Train set has total {0} entries with {1:.2f}% negative, {2:.2f}% positive".format(len(x_train),
(len(x_train[y_train1 == 0]) / (len(x_train)*1.))*100,
(len(x_train[y_train1 == 1]) / (len(x_train)*1.))*100))
print ("Test set has total {0} entries with {1:.2f}% negative, {2:.2f}% positive".format(len(x_test),
(len(x_test[y_test == 0]) / (len(x_test)*1.))*100,
(len(x_test[y_test == 1]) / (len(x_test)*1.))*100))
tvec1 = TfidfVectorizer(max_features=10000,ngram_range=(1, 2),min_df=3,use_idf=1,smooth_idf=1,sublinear_tf=1,stop_words = 'english')
tvec1.fit(x_train)
x_train_tfidf = tvec1.transform(x_train)
print(x_test.shape)
x_test_tfidf = tvec1.transform(x_test).toarray()
model = Sequential()
model.add(Dense(100, activation='relu', input_dim=10000))
model.add(Dropout(0.25))
model.add(Dense(50,activation = 'relu'))
model.add(Dense(1, activation='sigmoid'))
optimiz = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(loss = 'binary_crossentropy',optimizer = optimiz ,metrics = ['accuracy'])
hist = model.fit(x_train_tfidf,y_train1,validation_data = (x_test_tfidf,y_test ),epochs = 5,batch_size = 64)
And my function
y_pred_vect = model.predict(x_test_tfidf)
# bolean mask
mask = (y_pred_vect != y_test).any(axis=1)
print(mask)
print(len(mask))
num_words=5000 # only use top 1000 words
INDEX_FROM=3 # word index offset
# этот шаг нужен чтобы получить `test_x` в изначальном виде (до токенизации):
(train_x, _), (test_x, _) = imdb.load_data(num_words=num_words, index_from=INDEX_FROM)
x_wrong = test_x[mask]
word_to_id = imdb.get_word_index()
word_to_id = {k:(v+INDEX_FROM) for k,v in word_to_id.items()}
word_to_id["<PAD>"] = 0
word_to_id["<START>"] = 1
word_to_id["<UNK>"] = 2
id_to_word = {value:key for key,value in word_to_id.items()}
all_wrong_sents = [' '.join(id_to_word[id] for id in sent) for sent in x_wrong]
print(all_wrong_sents[:10])
Error on line -
mask = (y_pred_vect != y_test).any(axis=1)
Data must be 1-dimensional
Try this...
import numpy as np
mask = np.squeeze(y_pred_vect) != y_test
I have the following custom layer:
import keras.backend as K
from keras.layers import Lambda
def l2_norm(x):
x = x ** 2
x = K.sum(x, axis=1)
x = K.sqrt(x)
return x
which I later use in my model in this way:
class Arq(object):
def __init__(self, nb_filters_1=10, nb_filters_2=20, lenght_filters=4,
pool_size_1=4, pool_size_2=2, dropout_prob_1=0.5,
dropout_prob_2=0.5, frames=2580, frequency=128,
dense_size=100, nb_classes=8):
self.nb_filters_1 = nb_filters_1
self.nb_filters_2 = nb_filters_2
self.lenght_filters = lenght_filters
self.pool_size_1 = pool_size_1
self.pool_size_2 = pool_size_2
self.dropout_prob_1 = dropout_prob_1
self.dropout_prob_2 = dropout_prob_2
self.frames = frames
self.frequency = frequency
self.dense_size = dense_size
self.nb_classes = nb_classes
def build_convolutional_model(self):
input_placeholder = Input(shape=(self.frames, self.frequency))
conv_1 = Conv1D(self.nb_filters_1, self.lenght_filters,
activation='relu', border_mode='same')(input_placeholder)
pool_1 = MaxPooling1D(self.pool_size_1)(conv_1)
conv_2 = Conv1D(self.nb_filters_1, self.lenght_filters,
border_mode='same', activation='relu')(pool_1)
pool_2 = MaxPooling1D(self.pool_size_1)(conv_2)
conv_3 = Conv1D(self.nb_filters_2, self.lenght_filters,
border_mode='same', activation='relu')(pool_2)
pool_3 = MaxPooling1D(self.pool_size_2)(conv_3)
global_mean = GlobalAveragePooling1D()(pool_3)
global_max = GlobalMaxPooling1D()(pool_3)
global_l2 = Lambda(l2_norm)(pool_3)
concat = merge([global_mean, global_max, global_l2], mode='concat', concat_axis=-1)
hidden = Dense(self.dense_size, activation='relu')(concat)
drop_1 = Dropout(self.dropout_prob_1)(hidden)
hidden_2 = Dense(self.dense_size, activation='relu')(drop_1)
drop_2 = Dropout(self.dropout_prob_1)(hidden_2)
output = Dense(self.nb_classes, activation='softmax')(drop_2)
model = Model(input=input_placeholder, output=output)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
self.model = model
return
def fit(self, x, y, batch_size, nb_epoch, validation_split):
self.model.fit(x, y, batch_size=batch_size, nb_epoch=nb_epoch,
validation_split=validation_split)
return
When I have this architecture the model does not learn past 25% accuracy but when on the merge layer I remove the L2 norm, i.e. concat = merge([global_mean, global_max], mode='concat', concat_axis=-1) then it reaches around ~90% accuracy.
I changed nothing in the data or training procedure, which leds to me to believe that there's something wrong with my custom function in the Lambda Layer,but I can't figure out what?
norm_layer = Lambda(lambda x: K.l2_normalize(x, axis = 1))(input layer)
You should specify across what axis you want to normalize your data. Usually, it is a one-dimensional vector, so the axis should be 1.