here's my model
model=Sequential()
model.add(Xception(weights='imagenet',input_shape=(224,224,3),include_top=False))
model.add(GlobalAveragePooling2D())
model.add(Dense(4096,activation='relu',name='fc1'))
model.add(Dense(4096,activation='relu',name='fc2'))
model.add(Dense(1000,activation='relu',name='fc3'))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid',name='fc4'))
model.layers[0].trainable=False
i want to make svm classifier as my final classifier in this model so how can i do that?
also another question i want to know the predicted class of a certain input
so when i use
model.predict(x_test)
it only gives me probabilities so how can i solve that too
You can use neural network as feature extractor and take outputs from last layer into your SVM. Try following:
model=Sequential()
model.add(Xception(weights='imagenet',input_shape=(224,224,3),include_top=False))
model.add(GlobalAveragePooling2D())
model.add(Dense(4096,activation='relu',name='fc1'))
model.add(Dense(4096,activation='relu',name='fc2'))
model.add(Dense(1000,activation='relu',name='fc3'))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid',name='fc4'))
model.compile(loss="categorical_crossentropy", optimizer="adam")
model.summary()
model.fit(X,y, epochs=10)
model.pop() # this will remove the last layer
model.summary() # check the network
feature_mapping = model(X)
from sklearn import svm
clf = svm.SVC()
clf.fit(feature_mapings, y)
Related
I have trained a text classification model that works well. I wanted to get deeper and understand what words/phrases from a sentence were most impactful in the classification outcome. I want to understand what words are most important for each classification outcome
I am using Keras for the classification and below is the code I am using to train the model. It's a simple embedding plus max-pooling text classification model that I am using.
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
# early stopping
callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0,
patience=5, verbose=2, mode='auto', restore_best_weights=True)
# select optimizer
opt = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999,
epsilon=1e-07, amsgrad=False, name="Adam")
embedding_dim = 50
# declare model
model = Sequential()
model.add(layers.Embedding(input_dim=vocab_size,
output_dim=embedding_dim,
input_length=maxlen))
model.add(layers.GlobalMaxPool1D())
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer=opt,
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
# fit model
history = model.fit(X_tr, y_tr,
epochs=20,
verbose=True,
validation_data=(X_te, y_te),
batch_size=10, callbacks=[callbacks])
loss, accuracy = model.evaluate(X_tr, y_tr, verbose=False)
How do I extract the phrases/words that have the maximum impact on the classification outcome?
It seems that the keyword you need are "neural network interpretability" and "feature attribution". One of the best known methods in this area is called Integrated Gradients; it shows how model prediction depend on each input feature (each word embedding, in your case).
This tutorial shows how to implement IG in pure tensorflow for images, and this one uses the alibi library to highlight the words in the input text with the highest impact on a classification model.
I am trying to use neural network for my regression problem in python but the output of the neural network is a straight horizontal line which is zero. I have one input and obviously one output.
Here is my code:
def baseline_model():
# create model
model = Sequential()
model.add(Dense(1, input_dim=1, kernel_initializer='normal', activation='relu'))
model.add(Dense(4, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_squared_error',metrics=['mse'], optimizer='adam')
model.summary()
return model
# evaluate model
estimator = KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=64,validation_split = 0.2, verbose=1)
kfold = KFold(n_splits=10)
results = cross_val_score(estimator, X_train, y_train, cv=kfold)
Here are the plots of NN prediction vs. target for both training and test data.
Training Data
Test Data
I have also tried different weight initializers (Xavier and He) with no luck!
I really appreciate your help
First of all correct your syntax while adding dense layers in model remove the double equal == with single equal = with kernal_initilizer like below
model.add(Dense(1, input_dim=1, kernel_initializer ='normal', activation='relu'))
Then to make the performance better do the followong
Increase the number of hidden neurons in the hidden layers
Increase the number of hidden layers.
If still you have same problem then try to change the optimizer and activation function. Tuning the hyperparameters may help you in converging to the solution
EDIT 1
You also have to fit the estimator after cross validation like below
estimator.fit(X_train, y_train)
and then you can test on the test data as follow
prediction = estimator.predict(X_test)
from sklearn.metrics import accuracy_score
accuracy_score(Y_test, prediction)
I am having a time series prediction problem and building an LSTM like below :
def create_model():
model = Sequential()
model.add(LSTM(50,kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01), input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dropout(0.591))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
When I train the model on 5 splits like below :
tss = TimeSeriesSplit(n_splits = 5)
X = data.drop(labels=['target_prediction'], axis=1)
y = data['target_prediction']
for train_index, test_index in tss.split(X):
train_X, test_X = X.iloc[train_index, :].values, X.iloc[test_index,:].values
train_y, test_y = y.iloc[train_index].values, y.iloc[test_index].values
model=create_model()
history = model.fit(train_X, train_y, epochs=10, batch_size=64,validation_data=(test_X, test_y), verbose=0, shuffle=False)
I get an overfitting problem. The graph of loss is attached
I am not sure why there is overfitting when I use regularizers in my Keras model. Any help is appreciated .
EDIT:
Tried the architectures
def create_model():
model = Sequential()
model.add(LSTM(20, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
def create_model(x,y):
# define LSTM
model = Sequential()
model.add(Bidirectional(LSTM(20, return_sequences=True), input_shape=(x,y)))
model.add(TimeDistributed(Dense(1, activation='sigmoid')))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
but still it is overfitting.
First of all remove all your regularizers and dropout. You are literally spamming with all the tricks out there and 0.5 dropout is too high.
Reduce the number of units in your LSTM. Start from there. Reach a point where your model stops overfitting.
Then, add dropout if required.
After that, the next step is to add the tf.keras.Bidirectional. If still, you are not satfisfied then, increase number of layers. Remember to keep return_sequences True for every LSTM layer except the last one.
It is seldom I come across networks using layer regularization despite the availability because dropout and layer regularization have a same effect and people usually go with dropout (at maximum, I have seen 0.3 being used).
I implemented an LSTM with Keras and trained it on a corpus. Now I want to take the output of the NN and pass it as input to another NN model and test on a test set. I saved the model of the NN, but I do not know how to extract the output and feed it to the other NN. How can I do it?
This is my code:
model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=seq_length))
model.add(LSTM(100, return_sequences=True))
model.add(LSTM(100))
model.add(Dense(100, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=
['accuracy'])
# fit model
model.fit(X, y, batch_size=128, epochs=500)
# save the model to file
model.save('model.h5')
There isn't any need to save a model to extract output . You can simply use the predict()function to get your output.
In your case to get the output from your model pass the test input as shown
prediction = model.predict(X_test)
where X_test is your test input and 'prediction' will contain your output which can then be printed.
For giving this output as input to another model , first create another model like you have one and then pass the prediction variable into the model.fit() function.
To use the output of one model in another model, simply load it and use it:
m0 = tf.keras.models.load_model("path_to_model_here", compile=False)
m0 = m0(some_input)
m1 = concatenate([some_input, m0])
m1 = Dense(DENSE_LAYER_NEURONS, input_shape=(INPUT_SIZE + M0_SIZE,), activation="relu")(m1)
m1 = Dense(DENSE_LAYER_NEURONS, activation="relu")(m1)
...
I have just started using Keras and was trying to train a model using Keras deep learning kit. Works till the epochs are runned but crashes just after it.
np.random.seed(1778) # for reproducibility
need_normalise=True
need_validataion=True
nb_epoch=2#8
#Creating model
model = Sequential()
model.add(Dense(512, input_shape=(dims,)))
model.add(PReLU())
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
opt=Adadelta(lr=1,decay=0.995,epsilon=1e-5)
model.compile(loss='binary_crossentropy', optimizer=opt)
auc_scores=[]
best_score=-1
best_model=None
print('Training model...')
if need_validataion:
for i in range(nb_epoch):
#early_stopping=EarlyStopping(monitor='val_loss', patience=0, verbose=1)
#model.fit(X_train, y_train, nb_epoch=nb_epoch,batch_size=256,validation_split=0.01,callbacks=[early_stopping])
model.fit(X_train, y_train, nb_epoch=2,batch_size=256,validation_split=0.15)
y_pre = model.predict_proba(X_valid)
scores = roc_auc_score(y_valid,y_pre)
auc_scores.append(scores)
print (i,scores)
if scores>best_score:
best_score=scores
best_model=model
plt.plot(auc_scores)
plt.show()
else:
model.fit(X_train, y_train, nb_epoch=nb_epoch, batch_size=256)
y_pre = model.predict_proba(X_test)[:,1]
print roc_auc_score(y_test,y_pre)
Error Recieved:
I have pasted it over here. Please have a look at it.
http://pastebin.com/dSw9ckkk
It looks like you have two classes, a positive class and a negative class, so that the positive class labels are 1 minus the negative class labels. In that case, you can discard the negative class labels and make it a single-class problem:
model.add(Dense(1), activation='sigmoid') # instead of Dense(nb_classes) and Activation('softmax')
Alternatively, you can still train the model on both classes and just use the positive class in the AUC calculation:
roc_auc_score(y_test[:, 1],y_pre[:, 1])