I made a custom loss function for cost-sensitive learning and to set the optimal parameters for neural network, I experienced with different parameters.(layers, batch size, epochs)
Then, the result was best when layers only has input and output layers.
I'm curious about this result.
Is it acceptable? then could you please tell me the reason?
Does that result occur because the dataset is already normalized?
Here's the code.
Thank you in advance.
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.layers import Dense
from keras.callbacks import History
from keras.models import Sequential
import keras.backend as K
# Define dataset
X,y = make_classification(n_samples=150000, n_features=10, n_informative=4,
n_redundant=4, n_repeated=2, n_classes=2, n_clusters_per_class=3,
class_sep = 0.5, weights=[0.9,0.1], random_state=27)
X_train, X_true, y_train, y_true = train_test_split(X, y, test_size=0.33, random_state=42)
# Define Function
def custom_loss_wrapper(p):
def custom_loss(y_true, y_pred):
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.cast(y_pred, tf.float32)
neg_y_true = 1 - y_true
neg_y_pred = 1 - y_pred
fp = K.sum(y_pred * (1 - y_true))
fn = K.sum((1 - y_pred) * y_true)
cost = tf.cast(fn * p + fp, tf.float32)
return cost
return custom_loss
def FindLayerNodesLinear(n_layers, first_layer_nodes, last_layer_nodes):
layers = []
nodes_increment = (last_layer_nodes - first_layer_nodes)/ (n_layers-1)
nodes = first_layer_nodes
for i in range(1, n_layers+1):
layers.append(math.ceil(nodes))
nodes = nodes + nodes_increment
return layers
def createmodel_for_grid_search(n_layers, first_layer_nodes, last_layer_nodes):
p =5
model = Sequential()
n_nodes = FindLayerNodesLinear(n_layers, first_layer_nodes, last_layer_nodes)
for i in range(1, n_layers):
if i==1:
model.add(Dense(first_layer_nodes, input_dim=X_train.shape[1], activation='relu'))
else:
model.add(Dense(n_nodes[i-1], activation='sigmoid'))
#Finally, the output layer should have a single node in binary classification
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer='adam', loss=custom_loss_wrapper(p) )
return model
# Grid search by hand
batch_sizes = [16, 32, 64, 128]
epochs = [10,20,30,50,100]
b = 0
loss_= []
for a in range(2,6):
for i in range(len(batch_sizes)):
batch_size_ = batch_sizes[i]
for j in range(len(epochs)):
epochs_ = epochs[j]
loss = np.round(hist_list[b][-1],3)
loss_.append(loss)
print('layers: {}, batch size: {}, epoch: {}, and loss: {}'.format(a, batch_size_, epochs_, loss))
b +=1
print('')
Related
I am a beginner in Keras programming. I just want to manually update the model weights manually in keras so as to get a deep understanding of gradient descent. However, when I tried it, the model either cannot get converged or the loss even gets exploded. My steps are listed as follows:
First, I use keras sequential model to fit a quadratic function y = 2*x*x - 7*x + 11
below is the code using the sequential model:
model = Sequential()
model.add(Dense(64, input_dim = 1, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()
training loss
fitted curved and original one
Then, I use the following code to update the weight manually:
class MyModel(keras.Model):
def __init__(self):
super().__init__()
self.layer1 = Dense(64, input_shape = (1, ))
self.layer2 = Dense(32)
self.layer3 = Dense(1)
def forward(self, x):
y = keras.activations.relu(self.layer1(x))
y = keras.activations.relu(self.layer2(y))
y = self.layer3(y)
return y
def loss_fun(y_pred, y):
return keras_backend.mean(keras.losses.mean_squared_error(y, y_pred))
def compute_loss(model, x, y, loss_fun = loss_fun):
logits = model.forward(x)
mse = loss_fun(y, logits)
return mse, logits
def compute_gradients(model, x, y, loss_fun = loss_fun):
with tf.GradientTape() as tape:
loss, _ = compute_loss(model, x, y, loss_fun)
return tape.gradient(loss, model.trainable_variables), loss
def apply_gradients(optimizer, gradients, variables):
optimizer.apply_gradients(zip(gradients, variables))
def train_batch(x, y, model, optimizer):
'''
one step batch training
'''
gradients, loss = compute_gradients(model, x, y)
apply_gradients(optimizer, gradients, model.trainable_variables)
return loss
model2 = MyModel()
epochs = 200
optimizer = keras.optimizers.Adam(learning_rate = 0.01) #据查这个0.01是keras默认的learning rate
loss = []
x_train = np.expand_dims(x_train, axis = 0)
y_train = np.expand_dims(y_train, axis = 0)
for i in range(epochs):
l = train_batch(x_train, y_train, model2, optimizer)
loss.append(l)
if i % 10 == 0:
print(f'current loss = {l}')
while the loss looks like this:
I also try another way to manually update the weights:
epochs = 200
lr = 0.01
optimizer = keras.optimizers.Adam(learning_rate = 0.01)
loss = []
x_train = np.expand_dims(x_train, axis = 0)
y_train = np.expand_dims(y_train, axis = 0)
x_train = tf.cast(x_train, tf.float32)
y_train = tf.cast(y_train, tf.float32)
for i in range(epochs):
y_pred = model5.forward(x_train)
l = k.mean(keras.losses.mean_squared_error(y_train, y_pred))
gradient = k.gradients(l, model5.trainable_weights)
new_weights = model5.get_weights() - 0.001 * np.array(gradients)
model5.set_weights(new_weights)
if i % 10 == 0:
loss.append(l)
print(f'{i}th loss is: {l}')
In this case, the loss explodes like this:
where is the problem?
I have figure out where the problem is.
When getting the model through the following code:
model = MyModel()
The trainable variables in model are null.
When I try to print them using this:
print(model.trainable_variables)
it outputs
[]
I try to make the weight trainable manually by the following code:
for layers in model.layers:
layers.trainable = True
But it still doesn't work at all.
I am novice in Machine Learning (ML) and I'm trying to implement algorithm to understand basic syntax of ML frameworks etc. Now I am working on MNIST database of handwritten digits dataset.
I implemented just one layer (I mean: Input layer has 784 inputs, Hidden layer has 512 nodes, Output layer has 10 outputs) Neural Network using TensorFlow framework, no data preprocessing, 128 batch size, 10 epochs, ADAM optimizer. And the algorithm achieved about 0.95 accuracy on train set.
After that I tried to implement exactly the same architecture in Keras. However, the accuracy (train set) is about 0.3. I tried to find many different implementations founded on the internet but I still cannot find where is the issue. I believe that it is something stupid (as always is) :-/
I presume that the same architecture in Keras should give the same results as the implementation in TensorFlow, am I correct?
My Keras implementation is:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.layers import Input, Dense
from keras.models import Model
from keras.utils.np_utils import to_categorical
df_train = pd.read_csv('datasets/MNIST_train.csv', delimiter=',', header=0)
Y_train, X_train = np.split(df_train.values, [1], axis=1)
m, n_x = X_train.shape
n_y = len(np.unique(Y_train))
n_layer1 = 512
batch_size = 128
num_epochs = 10
Y_train = to_categorical(Y_train)
X_input = Input(shape=(n_x,), name='input')
X = Dense(n_layer1, activation='relu', name='hidden')(X_input)
X = Dense(n_y, activation='softmax', name='output')(X)
model = Model(inputs=X_input, outputs=X, name='Neural Network')
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=num_epochs, batch_size=batch_size)
My TensorFlow implementation is:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
def one_hot(a, num_classes):
return np.eye(num_classes)[a.reshape(-1)]
def get_minibatches(batch_size, m, X, Y):
output_batches = []
for index in range(0, m, batch_size):
index_end = index + batch_size
batch = [X[index:index_end], Y[index:index_end]]
output_batches.append(batch)
return output_batches
def dense_layer(input, channels_in, channels_out, activation=None):
initializer = tf.contrib.layers.xavier_initializer()
w = tf.Variable(initializer([channels_in, channels_out]), name="w")
b = tf.Variable(tf.zeros([1, channels_out]), name="b")
if (activation == 'relu'):
a = tf.nn.relu(tf.matmul(input, w) + b)
return a
else:
z = tf.matmul(input, w) + b
return z
df_train = pd.read_csv('datasets/MNIST_train.csv', delimiter=',', header=0)
Y_train, X_train = np.split(df_train.values, [1], axis=1)
m, n_x = X_train.shape
n_y = len(np.unique(Y_train))
n_layer1 = 512
batch_size = 128
num_epochs = 10
Y_train = one_hot(Y_train, n_y)
X = tf.placeholder(tf.float32, [None, n_x], name="X")
Y = tf.placeholder(tf.float32, [None, n_y], name="Y")
hidden = dense_layer(X, n_x, n_layer1, 'relu')
output = dense_layer(hidden, n_layer1, n_y)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=Y))
optimizer = tf.train.AdamOptimizer().minimize(loss)
predict = tf.argmax(output, 1)
correct_prediction = tf.equal(predict, tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
minibatches = get_minibatches(batch_size, m, X_train, Y_train)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
current_cost = sess.run(loss, feed_dict={X: X_train, Y: Y_train})
train_accuracy = sess.run(accuracy, feed_dict={X: X_train, Y: Y_train})
print('Epoch: {:<4} - Loss: {:<8.3} Train Accuracy: {:<5.3} '.format(0, current_cost, train_accuracy))
for epoch in range(num_epochs):
for minibatch in minibatches:
minibatch_X, minibatch_Y = minibatch
sess.run(optimizer, feed_dict={ X: minibatch_X, Y: minibatch_Y })
current_cost = sess.run(loss, feed_dict={X: X_train, Y: Y_train})
train_accuracy = sess.run(accuracy, feed_dict={X: X_train, Y: Y_train})
print('Epoch: {:<4} - Loss: {:<8.3} Train Accuracy: {:<5.3} '.format(epoch + 1, current_cost, train_accuracy))
Could you help me and advice what I am doing wrong?
Thank you
Petr
I figured it out. At least partially. I standardized the input ((x - xmean) / xstd) and the Keras implementation has been started to return similar results as TensorFlow implementation…
I'm trying to do regression using a pretrained vgg16 network. As loss and also metric I have chosen the mean absolute error. I wanted to do a check if this score is actually correct and implemented the mean absolute score myself in a callback. However, the results are not the same as can be seen by the output:
Training MAE:126.649451276
Epoch 1/100
638/638 [==============================] - 406s - loss: 38.9601 - mean_absolute_error: 38.9601
Training MAE:40.7683742351
Epoch 2/100
638/638 [==============================] - 362s - loss: 19.8719 - mean_absolute_error: 19.8719
Training MAE:43.2516028945
The Training MAE should be the same (or at least almost the same), as the loss or the mean_absolute_error in the epoch above. For the first epoch this is ok. For the second epoch it is not. There the MAE is 43.24 but the loss is 19.87 and the mean_absolute_error provided by keras is 19.87.
I've cleaned up my code and tried to find a reason why but I can't find it. Why is this happening?
My code:
from keras.layers.core import Flatten, Dense, Dropout
import numpy as np
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras import optimizers
from keras.models import Model
import os
from keras.layers.core import *
from keras.callbacks import Callback, ModelCheckpoint
os.environ["CUDA_VISIBLE_DEVICES"]="2"
model_checkpoints = "/home/usr/PycharmProjects/RSNA/model_checkpoints/model2.hdf5"
data_dir = "/home/usr/PycharmProjects/RSNA/data/"
data_training = "dataset/training"
training_images = "boneage-training-dataset/"
training_gt = "training_gt/"
n_batch = 16
n_training_samples = 10213
n_validation_samples = 1136
n_testing_samples = 1262
def mae(X,y,mdl):
pred = mdl.predict(X)
gt = y
return str(np.mean(np.abs(np.array(gt)-np.array(pred))))
class LossHistory(Callback):
def on_epoch_begin(self, epoch, logs={}):
mae_score = mae(X_train,y_train,self.model)
print "Training MAE:" + mae_score
def regression_flow_from_directory(flow_from_directory_gen, rev_indices):
for x, y in flow_from_directory_gen:
yield x, [float(rev_indices[val]) for val in y]
if __name__ == '__main__':
width = 224
height = 224
X_train = []
y_train = []
train_datagen = image.ImageDataGenerator(
rescale=1./255,
width_shift_range=0.2,
height_shift_range= 0.2,
)
train_generator = train_datagen.flow_from_directory(
data_dir+data_training,
target_size=(width, height),
batch_size=n_batch,
color_mode='rgb',
class_mode='sparse',
seed=42)
indices = train_generator.class_indices
rev_indices = dict((v,k) for k, v in indices.iteritems())
train_generator = regression_flow_from_directory(train_generator,rev_indices)
i = 0
print "Epcohs: " + str(n_training_samples//n_batch)
for x,y in train_generator:
if i <= n_training_samples//n_batch:
X_train.extend(x)
y_train.extend(y)
i +=1
else:
break;
print "Maximum: " + str(np.max(y_train))
X_train = np.array(X_train)
print X_train.shape
model = VGG16(weights='imagenet', include_top=False,input_shape = (224, 224, 3))
last = model.output
x = Flatten(name='flatten')(last)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dropout(0.5, noise_shape=None, seed=None)(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = Dense(1, activation='relu', name='predictions')(x)
my_model = Model(input=model.input, output=x)
my_model.compile(loss="mae", optimizer=optimizers.SGD(lr=0.00001, momentum=0.9),
metrics=["mae"])
history = LossHistory()
print my_model.summary()
print n_validation_samples//n_batch
my_model.fit_generator(
train_generator,
steps_per_epoch=n_training_samples//n_batch,
epochs=100,
callbacks=[history],
)
I want to build 40-class LSTM classifier to analyze time series data. I have a 13 dimension real time data collected from 13 sensors. When I run the code below I keep getting this error message.
ValueError: Error when checking model input: the list of Numpy arrays
that you are passing to your model is not the size the model expected.
Expected to see 1 arrays but instead got the following list of 241458
arrays: [array([[ 0.64817517, 0.12892013, 0.01879949, 0.00946322,
0.00458952,
0.01668651, 0.04776124, 0.03301365, 0.0360659 , 0.15013408,
0.10112171, 0.05494366, 0.02620634],
RNN code
from __future__ import print_function
import keras
from keras import metrics
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Activation
from keras.utils import np_utils
from keras.layers.normalization import BatchNormalization
from sklearn.cross_validation import train_test_split
import pandas as pd
from keras.callbacks import CSVLogger
from keras.models import load_model
from keras.layers import LSTM
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import keras
def top_k_acc(y_true, y_pred):
return metrics.top_k_categorical_accuracy(y_true, y_pred, k=5)
# train Parameters
sequence_length = 60
data_dim = 13
num_classes = 40
batch_size = 15000
epochs = 10
# tf.set_random_seed(777) # reproducibility
def MinMaxScaler(data):
''' Min Max Normalization
Parameters
----------
data : numpy.ndarray
input data to be normalized
shape: [Batch size, dimension]
Returns
----------
data : numpy.ndarry
normalized data
shape: [Batch size, dimension]
References
----------
.. [1] http://sebastianraschka.com/Articles/2014_about_feature_scaling.html
'''
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
# Load data
xy = np.loadtxt('sc_total_for 60s v4.0 test.csv', delimiter=',', skiprows=1)
x = xy[:, 1:14]
x = MinMaxScaler(x)
y = xy[:,0]
# Build a dataset
x_data = []
y_data = []
for i in range(0, len(y) - sequence_length):
_x = x[i:i + sequence_length]
_y = y[i + sequence_length]
# print(_x, "->", _y)
x_data.append(_x)
y_data.append(_y)
# One-hot encoding
encoder = LabelEncoder()
encoder.fit(y_data)
encoded_Y = encoder.transform(y_data)
dummy_y = np_utils.to_categorical(encoded_Y)
#train/test split
x_train,x_test,y_train,y_test=train_test_split(x_data,dummy_y,random_state=4,test_size=0.3);
# print(x_train[0],"->",y_train[0])
# Network
model = Sequential()
model.add(LSTM(40, batch_input_shape=(batch_size, sequence_length, data_dim),return_sequences=True))
model.add(LSTM(40, return_sequences=False))
model.add(Dense(40))
model.add(Activation("linear"))
# model.add(Dense(40))
# model.add(Dense(25, init='uniform', activation='relu'))
# model.add(BatchNormalization())
# model.add(Dense(30, init='uniform', activation='relu'))
# model.add(BatchNormalization())
# model.add(Dense(40, init='uniform', activation='softmax'))
model.summary()
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['accuracy'])
csv_logger = CSVLogger('LSTM 1111.log')
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test),
callbacks=[csv_logger])
score = model.evaluate(x_test, y_test, verbose=0)
predictions=model.predict(x_test)
# model.save('New Model6 save.h5')
#plot_model(model, to_file='model1.png')
# print('Test loss:', score[0])
# print('Test accuracy:', score[1])
The issue is:
# Build a dataset
x_data = []
y_data = []
for i in range(0, len(y) - sequence_length):
_x = x[i:i + sequence_length]
_y = y[i + sequence_length]
# print(_x, "->", _y)
x_data.append(_x)
y_data.append(_y)
You're building a list of 2d numpy arrays for x_data when Keras expects a single, three-dimensional array for LSTM. Do this instead:
num_samples = len(y) - sequence_length
x_data = np.zeros((num_samples, sequence_length, data_dim))
y_data = np.zeros((num_samples))
for i in range(num_samples):
x_data[i] = x[i:i + sequence_length]
y_data[i] = y[i + sequence_length]
I'm doing a comparison between Keras (with Theano) and Lasagne on a toy regression problem in order to choose one of the two for my final application. As a result of this comparison, I see that Lasagne is performing so much worse than Keras that I'm starting to doubt about my code. Since I'm quite new to both Keras and Lasagne, I would like to check this with someone more experienced than me. The network should be trained to find the mean of a 16x16 matrix. I made different try: first, tried with a 2D conv layer + dense layer (since my final application will require using CNN). Then, since Lasagne results were horrible, I tried with a standard one layer MLP. Again, awful Lasagne performance. I tried to use same specs for both cases: same batch size, same initialization, same optimizer (tested both SGD with Nesterov momentum and ADAM), and of course, same number of epochs and network architecture. Can someone tell me what is going on? Is there something wrong in my code? Why so much difference in the performance? If everything is correct, why Keras perform so much better than Lasagne?
Here the codes I am using:
Keras:
# -*- coding: utf-8 -*-
import numpy as np
np.random.seed(1337) # for reproducibility
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.layers import Convolution2D
from keras import backend as K
from keras.optimizers import SGD
import matplotlib.pyplot as plt
batch_size = 500
nb_output = 1
nb_epoch = 10
# input image dimensions
img_rows, img_cols = 16, 16
# number of convolutional filters to use
nb_filters = 20
# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (3, 3)
X_train = np.random.randn(10000, 16*16)
Y_train = np.mean(X_train, 1)
X_train = X_train.astype('float32')
X_test = np.random.randn(1000, 16*16)
Y_test = np.mean(X_test, 1)
if K._BACKEND == 'theano':
X_train = np.reshape(X_train, (10000, 1, 16, 16))
X_test = np.reshape(X_test, (1000, 1, 16, 16))
else:
X_train = np.reshape(X_train, (10000, 16, 16, 1))
X_test = np.reshape(X_test, (1000, 16, 16, 1))
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
model = Sequential()
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
border_mode='same',
input_shape=X_train.shape[1:], init='glorot_uniform'))
model.add(Activation('relu'))
#model.add(Flatten(input_shape=X_train.shape[1:]))
model.add(Flatten())
model.add(Dense(10, init='glorot_uniform'))
model.add(Activation('sigmoid'))
model.add(Dense(nb_output, init='glorot_uniform'))
model.add(Activation('linear'))
sgd = SGD(lr=0.1, momentum=0.9, nesterov=True)#decay=1e-6,
model.compile(loss='mse',
optimizer=sgd)
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=1)
predicts = model.predict(X_test, batch_size=1000, verbose=0)
print('Test score:', score[0])
plt.figure()
plt.scatter(Y_test, predicts)
Lasagne (adapted from mnist example):
# -*- coding: utf-8 -*-
from __future__ import print_function
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
import matplotlib.pyplot as plt
def load_dataset():
np.random.seed(1337)
X_train = np.random.randn(10000, 16*16)
X_train = X_train.astype('float32')
Y_train = np.mean(X_train, 1)
X_test = np.random.randn(1000, 16*16)
X_test = X_test.astype('float32')
Y_test = np.mean(X_test, 1)
X_train = np.reshape(X_train, (10000, 1, 16, 16))
X_test = np.reshape(X_test, (1000, 1, 16, 16))
return X_train, Y_train, X_test, Y_test
def build_cnn(input_var=None):
network = lasagne.layers.InputLayer(shape=(None, 1, 16, 16),
input_var=input_var)
network = lasagne.layers.Conv2DLayer(
network, num_filters=20, filter_size=(3, 3),
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
network = lasagne.layers.DenseLayer(
network,
num_units=10,
nonlinearity=lasagne.nonlinearities.sigmoid)
network = lasagne.layers.DenseLayer(
network,
num_units=1,
nonlinearity=lasagne.nonlinearities.linear)
return network
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
def main(model='cnn', num_epochs=10):
print("Loading data...")
X_train, y_train, X_test, y_test = load_dataset()
input_var = T.tensor4('inputs')
target_var = T.vector('targets')
print("Building model and compiling functions...")
network = build_cnn(input_var)
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.squared_error(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=0.1, momentum=0.9)
# updates = lasagne.updates.adam(loss, params)
test_prediction = lasagne.layers.get_output(network)
test_loss = lasagne.objectives.squared_error(test_prediction,
target_var)
test_loss = test_loss.mean()
train_fn = theano.function([input_var, target_var], loss, updates=updates)
val_fn = theano.function([input_var, target_var], test_loss)
preds = theano.function([input_var], test_prediction)
print("Starting training...")
for epoch in range(num_epochs):
train_err = 0.0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train, 500, shuffle=False):
inputs, targets = batch
train_err += train_fn(inputs, targets)
train_batches += 1
test_err = 0.0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
inputs, targets = batch
err = val_fn(inputs, targets)
test_err += err
test_batches += 1
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" test loss:\t\t{:.6f}".format(test_err / test_batches))
pds = preds(X_test)
plt.scatter(y_test, pds)
plt.show()
if __name__ == '__main__':
main()
Both codes are easily adaptable to a one layer MLP. If you run them, you will get this scatter plot at the end:
lasagne:
keras:
.
On x axis: true values, on y axis predicted values.