Pytorch features and classes from .npy files - python

I am very rookie in moving from TensorFlow to Pytorch. In tensorflow, I can simply load features and labels from separate .npy files and train a CNN using them. It is simple as below:
def finetune_resnet(file_train_classes, file_train_features, name_model_to_save):
#Lets load features and classes first
print("Loading, organizing and pre-processing features")
num_classes = 12
x_train=np.load(file_train_features)
y_train=np.load(file_train_classes)
#Defining train as 70% and validation 30% of the data
#The partition is stratified with a fixed random state
#Therefore, for all networks, the partition will be the same
x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train, test_size=0.30, stratify=y_train, random_state=42)
print("transforming to categorical")
y_train = to_categorical(y_train, num_classes)
y_validation = to_categorical(y_validation, num_classes)
y_train= tf.constant(y_train, shape=[y_train.shape[0], num_classes])
y_validation= tf.constant(y_validation, shape=[y_validation.shape[0], num_classes])
print("preprocessing data")
#Preprocessing data
x_train = x_train.astype('float32')
x_validation=x_validation.astype('float32')
x_train /= 255.
x_validation /= 255.
print("Setting up the network")
#Parameters for network training
batch_size = 32
epochs=300
sgd = SGD(lr=0.01)
trainAug = ImageDataGenerator(rotation_range=30,zoom_range=0.15,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.15,horizontal_flip=True,fill_mode="nearest")
print("Compiling the network")
#Load model and prepare it for fine tuning
baseModel = ResNet50(weights="imagenet", include_top=False,
input_tensor=Input(shape=(224, 224, 3)))
# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(512, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(num_classes, activation="softmax")(headModel)
# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])
trainAug.fit(x_train)
# Fit the model on the batches generated by datagen.flow().
print("[INFO] training head...")
H=model.fit(trainAug.flow(x_train, y_train, batch_size=batch_size), steps_per_epoch=x_train.shape[0] // batch_size, epochs=epochs, validation_data=(x_validation, y_validation), callbacks=callbacks)
However, I have no idea how to load, train and evaluate training and testing data if loading these data from .npy files. I checked a tutorial that loads training data from folders, which is not what I want.
How can I train and test a RESNET-50 model starting with imagenet weights loading train and test data from .npy files with Pytorch?
P.s: most of Pytorch training loops require <class 'torch.utils.data.dataloader.DataLoader'> inputs to train. Is that possible to transform my training data in numpy arrays to such a format?
P.s= you can try with my data here

It seems like you need to create a custom Dataset.
class MyDataSet(torch.utils.data.Dataset):
def __init__(self, x, y):
super(MyDataSet, self).__init__()
# store the raw tensors
self._x = np.load(file_train_features)
self._y = np.load(file_train_classes)
def __len__(self):
# a DataSet must know it size
return self._x.shape[0]
def __getitem__(self, index):
x = self._x[index, :]
y = self._y[index, :]
return x, y
You can further use Dataset methods to split MyDataSet into train and validation (e.g., using torch.utils.data.random_split).
You might also find TensorDataset useful.

Related

Why no ability to generalize, but loss seems good? (ANN, keras, high dimensional data)

I am using tensorflow and keras for a binary classification problem.
I have only a training set of 81 samples (Testsize 21), but ~1900 features. I know its too less samples and too many features, but its a biological problem (gene-expression data), so i have to deal with it.
My model looks like this: (using different neurons per layer, different number of hidden layers, regularization and dropout to deal with the high dimensional data)
model = Sequential()
model.add(Input((input_shape,)))
for i in range(num_hidden):
model.add(Dense(n_neurons, activation="relu",kernel_regularizer=keras.regularizers.l1_l2(l1_reg, l2_reg)))
model.add(Dropout(dropout_rate))
model.add(Dense(1, activation="sigmoid"))
ann_optimizer= keras.optimizers.Adam()
model.compile(loss="binary_crossentropy",
optimizer=ann_optimizer, metrics=['accuracy'])
I am using a 10 fold nested cross validation and grid search in the inner fold like this:
# fit and evaluate the model
# configure the inner cross-validation procedure (5 fold, 80 inner training dataset, 20 inner test dataset)
cv_inner = ShuffleSplit(n_splits=5, test_size=0.2, random_state=1)
# define the mode
ann = KerasRegressor(build_fn=regressionModel_sequential, input_shape=X_train.shape[1],
batch_size=batch_size)
# use pipeline as prevent from Leaky Preprocessing (StandardScaler on 80% inner-training dataset))
pipe = Pipeline(steps=[('scaler', StandardScaler()), ('ann', ann)])
# define the grid search of with inner cv to get good parameters
grid_search_result = GridSearchCV(
pipe, param_grid, n_jobs=-1, cv=cv_inner, refit=True, verbose=0)
#refit = True a final model with the entire inner-training dataset
# execute search
grid_search_result.fit(X_train, y_train, ann__verbose=0)
logger.info('>>>>> est=%.3f, params=%s' % (grid_search_result.best_score_, grid_search_result.best_params_))
# to get loss curve
ann_val = regressionModel_sequential(input_shape=X_train.shape[1],
n_neurons=grid_search_result.best_params_['ann__n_neurons'],
l1_reg=grid_search_result.best_params_['ann__l1_reg'],
l2_reg=grid_search_result.best_params_['ann__l2_reg'],
num_hidden=grid_search_result.best_params_['ann__num_hidden'],
dropout_rate=grid_search_result.best_params_['ann__dropout_rate'])
# Validation with outer 20 %
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
history = ann_val.fit(X_train, y_train, batch_size=batch_size, verbose=0,
validation_split=0.25, shuffle=True, epochs=grid_search_result.best_params_['ann__epochs'])
plot_history(history, directory, i)
# use best grid search reult for predicting on outer test dataset
y_predicted = ann_val.predict(X_test)
# print predicted
logger.info(y_predicted[:5])
logger.info(y_test[:5])
rmse = (np.sqrt(metrics.mean_squared_error(y_test, y_predicted)))
mae = (metrics.mean_squared_error(y_test, y_predicted))
r_squared = metrics.r2_score(y_test, y_predicted)
My loss seems good: loss
But accuracy is very bad. accuracy (example from one outer fold)
Does anyone have suggestions on what i could do to improve my results?
I also know that the biological question behind is very hard/maybe not possible to solve.

train test split is not splitting correctly

I am still a beginner in AI and deep learning but I wanted to test whether a neural network will be able to calculate the sum of two numbers so I generated a dataset of 5000 numbers and made test size = 0.3 so the training dataset will be equal to 3500 but what was weird that I found the model is training only on 110 input instead of 3500.
The code used:
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np
from random import random
def generate_dataset(num_samples, test_size=0.33):
"""Generates train/test data for sum operation
:param num_samples (int): Num of total samples in dataset
:param test_size (int): Ratio of num_samples used as test set
:return x_train (ndarray): 2d array with input data for training
:return x_test (ndarray): 2d array with input data for testing
:return y_train (ndarray): 2d array with target data for training
:return y_test (ndarray): 2d array with target data for testing
"""
# build inputs/targets for sum operation: y[0][0] = x[0][0] + x[0][1]
x = np.array([[random()/2 for _ in range(2)] for _ in range(num_samples)])
y = np.array([[i[0] + i[1]] for i in x])
# split dataset into test and training sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size)
return x_train, x_test, y_train, y_test
if __name__ == "__main__":
# create a dataset with 2000 samples
x_train, x_test, y_train, y_test = generate_dataset(5000, 0.3)
# build model with 3 layers: 2 -> 5 -> 1
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(5, input_dim=2, activation="sigmoid"),
tf.keras.layers.Dense(1, activation="sigmoid")
])
# choose optimiser
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
# compile model
model.compile(optimizer=optimizer, loss='mse')
# train model
model.fit(x_train, y_train, epochs=100)
# evaluate model on test set
print("\nEvaluation on the test set:")
model.evaluate(x_test, y_test, verbose=2)
# get predictions
data = np.array([[0.1, 0.2], [0.2, 0.2]])
predictions = model.predict(data)
# print predictions
print("\nPredictions:")
for d, p in zip(data, predictions):
print("{} + {} = {}".format(d[0], d[1], p[0]))
The 110/110 you are seeing in your image is actually the batch count, not the sample count. So 110 batches * the default batch size of 32 gives you ~3500 training samples, which matches what you'd expect as 70% of 5000.
You can see by backing into it the other way that the last batch would be a partial batch, since it's not evenly divisible by 32:
>>> (.7 * 5000) / 110
31.818181818181817
In neural networks, an epoch is a full pass over the data. It trains in small batches (also called steps), and this is the way Keras logs them.

LSTM/GRU TImeSeries multioutput strategy forecasts give dropped values

Currently, I'm playing with Stocks Predictions task which I try to solve using LSTM/GRU.
Problem: After training LSTM/GRU I get huge drop predicted values
Model training process
Train, test data is simply generated using pd.shift in series_to_supervised function.
df['Mid'] = df['Low'] + df['High'] / 2
n_lag = 1 # Lag columns back
n_seq = 1*50 # TimeSteps to predict
seq_col = 'Mid'
seq_col_t = f'{seq_col}(t)'
split_date = '2018-01-01'
def series_to_supervised(data: pd.DataFrame,
seq_col: str,
n_in: int = 1,
n_out: int = 1,
drop_seq_col: bool = True,
dropna: bool = True):
"""Convert time series into supervised learning problem
{input sequence, forecast sequence}
"""
# input sequence (t-n, ... t-1) -> pisitive shift
for i in range(n_in, 0, -1):
data[f'{seq_col}(t-{i})'] = data[seq_col].shift(i)
# no sequence -> no shift
data[f'{seq_col}(t)'] = data[seq_col]
for i in range(1, n_out+1):
# forecast sequence (t, t+1, ... t+n) -> negative shift
data[f'{seq_col}(t+{i})'] = data[seq_col].shift(-i)
if drop_seq_col:
data = data.drop(seq_col, axis=1)
if dropna:
data.dropna(inplace=True)
return data
df = series_to_supervised(df, seq_col=seq_col, n_in=n_lag, n_out=n_seq)
mask = df.index < split_date
train, test = df[mask], df[~mask]
X_cols = ['Mid(t-1)']
y_cols = train.filter(like='Mid(t+').columns
X_train, y_train, X_test, y_test = train[X_cols], train[y_cols], test[X_cols], test[y_cols]
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
# also returns np.ndarray
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
y_train = y_train.values
y_test = y_test.values
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, GRU
from keras.optimizers import Adam, RMSprop, Adamax
from keras.callbacks import ModelCheckpoint
def get_model(X, y, n_batch):
num_classes=y.shape[1]
# design network
model = Sequential()
# For Stock Predictions has to be used LSTM stateful=True
model.add(GRU(10, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
model.add(Dropout(0.3))
model.add(Dense(num_classes))
opt = Adam(learning_rate=0.01)
# opt = RMSprop(learning_rate=0.001)
model.compile(loss='mean_squared_error', optimizer=opt)
return model
def reshape_batch(X_train, y_train, X_test, y_test, n_batch):
# reshape training into [samples, timesteps, features]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
# cut to equally divided n_batches (without reminder).
# needed for LSTM stateful=True
train_cut = X_train.shape[0] % n_batch
test_cut = X_test.shape[0] % n_batch
if train_cut > 0:
X_train = X_train[:-train_cut]
y_train = y_train[:-train_cut]
if test_cut > 0:
X_test = X_test[:-test_cut]
y_test = y_test[:-test_cut]
return X_train, y_train, X_test, y_test
# fit an LSTM network to training data
def fit_lstm(X_train: np.ndarray,
y_train: np.ndarray,
n_lag: int,
n_seq: int,
n_batch: int,
nb_epoch: int,
X_test: np.ndarray=None,
y_test: np.ndarray=None):
model = get_model(X_train, y_train, n_batch)
# fit network
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=None,
epochs=nb_epoch, batch_size=n_batch, verbose=1, shuffle=False)
print('Predict:', model.predict(X_test, batch_size=n_batch))
model.reset_states()
return model, history
n_batch = 32
nb_epoch = 40
X_train, y_train, X_test, y_test = reshape_batch(X_train, y_train, X_test, y_test, n_batch)
model, history = fit_lstm(X_train, y_train, n_lag, n_seq, n_batch, nb_epoch, X_test=X_test, y_test=y_test)
What I Have tried
Different optimizers (kinda all available in keras)
DIfferent recurrent network structures (GRU/LSTM)
Different learning rates
Different epochs from 1 to 1500
Adding/Removing Drop layers with different params (0.1-0.7)
Different LSTM/GRU amount of neurons (1-100)
Number of LSTM/GRU layers, via return_sequences params with more Drop layers.
Different number of forecasts(t+1,t+2 ... t+n) features from 1-365
Different number of lag (t-1, t-2, t-n ...) features from 1-5
Different scale normalization borders (0,1) and (-1,1)
Different n_batch values: 1,8,16,32
What can affect LSTM/GRU give so strange behaviour? And What else should I try to make it work the normal way?

Getting gradient of a Keras model output w.r.t input, but with the last layer being an SVM

I have a CNN model built in Keras. I then took out its last layer as a feature and retrained an SVM with it.
Is it possible to now find the gradient of the SVMs output wrt the CNN model's input?
I know of this method (Getting gradient of model output w.r.t weights using Keras) and am able to use it to get the gradient wrt input for the layer i am pulling the features out of. I can also get the numerical gradient of the SVM wrt to its input, albeit at the moment its a bit of a mess. Would appreciate some input here as well actually.
But now I need to somehow combine these two to get the gradient of the SVM to the input of the entire CNN model.
"""
Main CNN script
"""
# Imports ##
# general
import matplotlib.pyplot as plt
import numpy as np
# ML libraries
from tensorflow.keras.datasets import mnist
# ML utilities
from tensorflow.keras.utils import to_categorical
# Python scripts used
import train_CNN
import load_CNN
import train_subSVMs
import load_subSVMs
import train_finalSVM
import load_finalSVM
import joblib
def save_array(array, name):
joblib.dump(array, name+'.pkl', compress = 3)
return
def load_array(array, name):
array = joblib.load(array, name)
return array
def show_data_example(i, dataset):
# show some of the images in the dataset
# call multiple times for multiple images
# squeeze is necessary here to get rid of the extra dimension introduced in rehsaping
print('\nExample Image: %s from selected dataset' %i)
plt.imshow(np.squeeze(dataset[i]), cmap=plt.get_cmap('gray'))
plt.show()
return
def load_and_encode(target_shape):
# load dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, y_train = X_train[:,:,:],y_train[:]
X_test, y_test = X_test[:,:,:], y_test[:]
print('Loaded Mnist dataset')
print('Train: X=%s, y=%s' % (X_train.shape, y_train.shape))
print('Test: X=%s, y=%s' % (X_test.shape, y_test.shape))
# encode y data
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
# normalise X data (X/255 -> [0,1])
X_train = X_train/255.0
X_test = X_test/255.0
# currently dimensions are (m x 28 x 28)
# making them into (m x 28x28x1) 3Dimensional for convolution networks
X_train = X_train.reshape(X_train.shape[0], target_shape[0], target_shape[1], target_shape[2])
X_test = X_test.reshape(X_test.shape[0], target_shape[0], target_shape[1], target_shape[2])
# show an arbitary example image from training set
show_data_example(12, X_train)
return X_train, y_train, X_test, y_test
image_shape = (28,28,1)
# load and encode mnist data
X_train, y_train, X_test, y_test = load_and_encode(image_shape)
# hyper-parameters
learning_rate = 0.1
momentum = 0.9
dropout = 0.5
batch_size = 128
epochs = 50
decay = 1e-6
number_of_classes = 10
# store required data into a packet to send to various imports
packet = [learning_rate, momentum, dropout, batch_size, epochs, decay,
number_of_classes, image_shape,
X_train, y_train, X_test, y_test]
data = [X_train, y_train, X_test, y_test]
#CNN_model = train_CNN.train_model(packet, save_model = 'True')
CNN_model = load_CNN.load_model(packet) # keras sequential model
#subSVM1, subSVM2, subSVM3, features = train_subSVMs.train(CNN_model, data, c=0.1, save_model = 'True', get_accuracies= 'True')
subSVM1, subSVM2, subSVM3, features = load_subSVMs.load(CNN_model, data, c=0.1, get_accuracies='False')
subSVMs = [subSVM1, subSVM2, subSVM3]
feature1_train, feature1_test,\
feature2_train, feature2_test,\
feature3_train, feature3_test = features
final_SVM = joblib.load('saved_finalSVM.pkl') # sklearn svm trained from features
NUMBER = 48
plt.imshow(np.squeeze(X_train[NUMBER,:,:,:]), cmap=plt.get_cmap('binary'))
# gradients of features wrt to input
import tensorflow.keras.backend as K
gradients = K.gradients(CNN_model.get_layer(name='feature1').output, CNN_model.input) # K.gradients(y,x) for dy/dx
f = K.function([CNN_model.input], gradients)
x = np.expand_dims(X_train[NUMBER,:,:,:],axis=0)
a=f([x])

How to initialise weights of a MLP using an autoencoder #2nd part - Deep autoencoder #3rd part - Stacked autoencoder

I have built an autoencoder (1 encoder 8:5, 1 decoder 5:8) which takes the Pima-Indian-Diabetes dataset (https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv) and reduces its dimension (from 8 to 5). I would now like to use these reduced features to classify the data using an mlp. Now, here, I have some problems with the basic understanding of the architecture. How do I use the weights of the autoencoder and feed them into the mlp? I have checked these threads - https://github.com/keras-team/keras/issues/91 and https://www.codementor.io/nitinsurya/how-to-re-initialize-keras-model-weights-et41zre2g. The question here is which weight matrix should I consider? the one for the encoder part or the decoder part? When I add the layers for the mlp, how do I initialise the weights with these saved weights, not getting the exact syntax. Also, should my mlp start with 5 neurons since my reduced dimension is 5? What are the possible dimensions of the mlp for this binary classification problem? If anyone could elaborate please?
The deep autoencoder code is as follows:
# from keras.models import Sequential
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy
# Data pre-processing...
# load pima indians dataset
dataset = numpy.loadtxt("C:/Users/dibsa/Python Codes/pima.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]
# Split data into training and testing datasets
x_train, x_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=42)
# scale the data within [0-1] range
scalar = MinMaxScaler()
x_train = scalar.fit_transform(x_train)
x_test = scalar.fit_transform(x_test)
# Autoencoder code begins here...
encoding_dim1 = 5 # size of encoded representations
encoding_dim2 = 3 # size of encoded representations in the bottleneck layer
# this is our input placeholder
input_data = Input(shape=(8,))
# "encoded" is the first encoded representation of the input
encoded = Dense(encoding_dim1, activation='relu', name='encoder1')(input_data)
# "enc" is the second encoded representation of the input
enc = Dense(encoding_dim2, activation='relu', name='encoder2')(encoded)
# "dec" is the lossy reconstruction of the input
dec = Dense(encoding_dim1, activation='sigmoid', name='decoder1')(enc)
# "decoded" is the final lossy reconstruction of the input
decoded = Dense(8, activation='sigmoid', name='decoder2')(dec)
# this model maps an input to its reconstruction
autoencoder = Model(inputs=input_data, outputs=decoded)
autoencoder.compile(optimizer='sgd', loss='mse')
# training
autoencoder.fit(x_train, x_train,
epochs=300,
batch_size=10,
shuffle=True,
validation_data=(x_test, x_test)) # need more tuning
# test the autoencoder by encoding and decoding the test dataset
reconstructions = autoencoder.predict(x_test)
print('Original test data')
print(x_test)
print('Reconstructed test data')
print(reconstructions)
#The stacked autoencoder code is as follows:
# from keras.models import Sequential
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy
# Data pre-processing...
# load pima indians dataset
dataset = numpy.loadtxt("C:/Users/dibsa/Python Codes/pima.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]
# Split data into training and testing datasets
x_train, x_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=42)
# scale the data within [0-1] range
scalar = MinMaxScaler()
x_train = scalar.fit_transform(x_train)
x_test = scalar.fit_transform(x_test)
# Autoencoder code goes here...
encoding_dim1 = 5 # size of encoded representations
encoding_dim2 = 3 # size of encoded representations in the bottleneck layer
# this is our input placeholder
input_data1 = Input(shape=(8,))
# the first encoded representation of the input
encoded1 = Dense(encoding_dim1, activation='relu',
name='encoder1')(input_data1)
# the first lossy reconstruction of the input
decoded1 = Dense(8, activation='sigmoid', name='decoder1')(encoded1)
# this model maps an input to its first layer of reconstructions
autoencoder1 = Model(inputs=input_data1, outputs=decoded1)
# this is the first encoder model
enc1 = Model(inputs=input_data1, outputs=encoded1)
autoencoder1.compile(optimizer='sgd', loss='mse')
# training
autoencoder1.fit(x_train, x_train, epochs=300,
batch_size=10, shuffle=True,
validation_data=(x_test, x_test))
FirstAEoutput = autoencoder1.predict(x_train)
input_data2 = Input(shape=(encoding_dim1,))
# the second encoded representations of the input
encoded2 = Dense(encoding_dim2, activation='relu',
name='encoder2')(input_data2)
# the final lossy reconstruction of the input
decoded2 = Dense(encoding_dim1, activation='sigmoid',
name='decoder2')(encoded2)
# this model maps an input to its second layer of reconstructions
autoencoder2 = Model(inputs=input_data2, outputs=decoded2)
# this is the second encoder
enc2 = Model(inputs=input_data2, outputs=encoded2)
autoencoder2.compile(optimizer='sgd', loss='mse')
# training
autoencoder2.fit(FirstAEoutput, FirstAEoutput, epochs=300,
batch_size=10, shuffle=True)
# this is the overall autoencoder mapping an input to its final reconstructions
autoencoder = Model(inputs=input_data1, outputs=encoded2)
# test the autoencoder by encoding and decoding the test dataset
reconstructions = autoencoder.predict(x_test)
print('Original test data')
print(x_test)
print('Reconstructed test data')
print(reconstructions)
If your decoder is trying to reconstruct the input, then it doesn't really make sense to me to attach your classifier to its output. I mean, why not just attach it to the input in the first time? So if you are set on using an auto-encoder, I'd say it's pretty clear that you should attach your classifier to the output of the encoder pipe.
I'm not quite sure what you mean with "use the weights of the autoencoder and feed them into the mlp". You don't feed a layer with another layer's weights, but with it's output signal. This is pretty easy to do on Keras. Let's say you defined your auto-encoder and trained it as such:
from keras Input, Model
from keras import backend as K
from keras.layers import Dense
x = Input(shape=[8])
y = Dense(5, activation='sigmoid' name='encoder')(x)
y = Dense(8, name='decoder')(y)
ae = Model(inputs=x, outputs=y)
ae.compile(loss='mse', ...)
ae.fit(x_train, x_train, ...)
K.models.save_model(ae, './autoencoder.h5')
Then you can attach a classifying layer at the encoder and create a classifier model with the following code:
# load the model from the disk if you
# are in a different execution.
ae = K.models.load_model('./autoencoder.h5')
y = ae.get_layer('encoder').output
y = Dense(1, activation='sigmoid', name='predictions')(y)
classifier = Model(inputs=ae.inputs, outputs=y)
classifier.compile(loss='binary_crossentropy', ...)
classifier.fit(x_train, y_train, ...)
That's it, really. The classifier model will now have the first embedding layer encoder of the ae model as its first layer, followed by a sigmoid decision layer predictions.
If what you are really trying to do is to use the weights learned by the auto-encoder to initialize the weights from the classifier (I'm not positive I recommend this approach):
You can take the weight matrices with layer#get_weights, prune it (because the encoder has 5 units and the classifier only has 1) and finally set the classifier weights. Something in the following lines:
w, b = ae.get_layer('encoder').get_weights()
# remove all units except by one.
neuron_to_keep = 2
w = w[:, neuron_to_keep:neuron_to_keep + 1]
b = b[neuron_to_keep:neuron_to_keep + 1]
classifier.get_layer('predictions').set_weights(w, b)
Idavid, this is for your reference - MLP using Autoencoder reduced features. I need to understand which figure is the correct one? Sorry, I had to upload the picture as an answer as there was no option of uploading an image via comment. I think you are saying figure B is the correct one. Here is the code snippet for the same. Please let me know if I going right.
# This is a mlp classification code with features reduced by an Autoencoder
# from keras.models import Sequential
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy
# Data pre-processing...
# load pima indians dataset
dataset = numpy.loadtxt("C:/Users/dibsa/Python Codes/pima.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]
# Split data into training and testing datasets
x_train, x_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=42)
# scale the data within [0-1] range
scalar = MinMaxScaler()
x_train = scalar.fit_transform(x_train)
x_test = scalar.fit_transform(x_test)
# Autoencoder code goes here...
encoding_dim = 5 # size of our encoded representations
# this is our input placeholder
input_data = Input(shape=(8,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu', name='encoder')(input_data)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(8, activation='sigmoid', name='decoder')(encoded)
# this model maps an input to its reconstruction
autoencoder = Model(inputs=input_data, outputs=decoded)
autoencoder.compile(optimizer='sgd', loss='mse')
# training
autoencoder.fit(x_train, x_train,
epochs=300,
batch_size=10,
shuffle=True,
validation_data=(x_test, x_test)) # need more tuning
# test the autoencoder by encoding and decoding the test dataset
reconstructions = autoencoder.predict(x_test)
print('Original test data')
print(x_test)
print('Reconstructed test data')
print(reconstructions)
# MLP code goes here...
# create model
x = autoencoder.get_layer('encoder').output
# h = Dense(3, activation='relu', name='hidden')(x)
y = Dense(1, activation='sigmoid', name='predictions')(x)
classifier = Model(inputs=autoencoder.inputs, outputs=y)
# Compile model
classifier.compile(loss='binary_crossentropy', optimizer='adam',
metrics=['accuracy'])
# Fit the model
classifier.fit(x_train, y_train, epochs=250, batch_size=10)
print('Now making predictions')
predictions = classifier.predict(x_test)
# round predictions
rounded_predicted_classes = [round(x[0]) for x in predictions]
temp = sum(y_test == rounded_predicted_classes)
acc = temp/len(y_test)
print(acc)

Categories