Weird decision boundary using neural net in Tensorflow - python

I have generated a balanced dataset of 4000 examples, 2000 for the negative class and 2000 for the positive one. Then, I've build a neural net with one single hidden layer and 3 neurons with a ReLU activation function and an output layer with a sigmoid. The cost function is a standard cross-entropy function and I chose Adam as optimizer. Using minibatches of 15 examples, after 1000 epochs of running the final accuracy 96.37%, so I am assuming that the model is doing well on the test set. But when I want to display the decision boundary, that's what I get:
I cannot figure out if the problem is a code error or the model just needs mode training. Script I'm using for this:
# implement a neural network that finds a decision boundary under a
constraint on the second hidden layer with tensorflow
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tf_utils import random_mini_batches
import matplotlib.pyplot as plt
def generate_dataset():
np.random.seed(2)
# positive class samples
d1_x = np.random.normal(5, 10, 1000)
d1_y = np.random.normal(5, 2, 1000)
d2_x = np.random.normal(40, 20, 1000)
d2_y = np.random.normal(2, 1, 1000)
# negative class samples
d3_x = np.random.normal(60, 5, 2000)
d3_y = np.random.normal(10, 1, 2000)
plt.scatter(d1_x, d1_y, color='b')
plt.scatter(d2_x, d2_y, color='b')
plt.scatter(d3_x, d3_y, color='r')
Y = np.zeros((4000, 1))
d_x = np.concatenate([d1_x, d2_x, d3_x])
d_y = np.concatenate([d1_y, d2_y, d3_y])
d_x = d_x.reshape(d_x.shape[0], 1)
d_y = d_y.reshape(d_y.shape[0], 1)
X = np.concatenate([d_x, d_y], axis=1)
Y[2000:] = 1
return X, Y
# define a tensorflow model 5-3-1 with two hideen layers and the output
being scalar
costs = []
print_cost = True
learning_rate = .0009
minibatch_size = 15
num_epochs = 1000
XX, YY = generate_dataset()
XX, YY = shuffle(XX, YY)
X_norm = normalize(XX)
X_train, X_test, y_train, y_test = train_test_split(X_norm, YY,
test_size=0.2, random_state=42)
X_train = np.transpose(X_train)
y_train = np.transpose(y_train)
X_test = np.transpose(X_test)
y_test = np.transpose(y_test)
# define train and test sets
m = XX.shape[1] # input dimension
n = YY.shape[1] # output dimension
X = tf.placeholder(tf.float32, shape = [m, None], name = 'X')
y = tf.placeholder(tf.float32, shape = [n, None], name = 'y')
# model parameters
n1 = 3 # output dimension of the first hidden layer
#n2 = 4 # output dimension of the second hidden layer
#n3 = 2
W1 = tf.get_variable("W1", [n1, m],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable("b1", [n1 ,1], initializer=tf.zeros_initializer)
#W2 = tf.get_variable("W2", [n2, n1],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
#b2 = tf.get_variable("b2", [n2, 1], initializer=tf.zeros_initializer)
#W3 = tf.get_variable("W3", [n3, n2],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
#b3 = tf.get_variable("b3", [n3, 1], initializer=tf.zeros_initializer)
W4 = tf.get_variable("W4", [n, n1],
initializer=tf.contrib.layers.xavier_initializer(seed=1))
b4 = tf.get_variable("b4", [n, 1], initializer=tf.zeros_initializer)
# forward propagation
z1 = tf.add(tf.matmul(W1, X), b1)
a1 = tf.nn.relu(z1)
#z2 = tf.add(tf.matmul(W2, a1), b2)
#a2 = tf.nn.relu(z2)
#z3 = tf.add(tf.matmul(W3, a2), b3)
#a3 = tf.nn.relu(z3)
z4 = tf.add(tf.matmul(W4, a1), b4)
pred = tf.nn.sigmoid(z4)
# cost function
cost = tf.reduce_mean(tf.losses.log_loss(labels=y, predictions=pred)) #
logit is the probability estimate given by the model --> this is what is used inside the formula, not the net input z
# ADAM optimizer
optimizer =
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# metrics
correct_prediction = tf.less_equal(tf.abs(pred - y), 0.5)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.global_variables_initializer()
with tf.Session() as sess:
seed = 1
sess.run(init)
for epoch in range(num_epochs):
epoch_cost = 0
seed += 1
num_minibatches = int(X_train.shape[0] / minibatch_size)
minibatches = random_mini_batches(X_train, y_train, minibatch_size, seed)
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_, minibatch_cost = sess.run([optimizer, cost], feed_dict={X:minibatch_X, y:minibatch_Y})
epoch_cost += minibatch_cost / minibatch_size
# Print the cost every epoch
if print_cost == True and epoch % 100 == 0:
print("Cost after epoch %i: %f" % (epoch, epoch_cost))
if print_cost == True and epoch % minibatch_size == 0:
costs.append(epoch_cost)
#plt.plot(costs)
#plt.show()
cp, val_accuracy = sess.run([correct_prediction, accuracy], feed_dict={X: X_test, y: y_test})
# plot the cost
# plt.plot(np.squeeze(costs))
# plt.ylabel('cost'), feed_dict={X: X_test, y: y_test})
# plt.xlabel('iterations (per fives)')
# plt.title("Learning rate =" + str(learning_rate))
# plt.show()
cmap = plt.get_cmap('Paired')
# Define region of interest by data limits
xmin, xmax = min(XX[:, 0]) - 1, max(XX[:, 0]) + 1
ymin, ymax = min(XX[:, 1]) - 1, max(XX[:, 1]) + 1
steps = 100
x_span = np.linspace(xmin, xmax, steps)
y_span = np.linspace(ymin, ymax, steps)
xx, yy = np.meshgrid(x_span, y_span)
A = np.concatenate([[xx.ravel()], [yy.ravel()]], axis=0)
A = normalize(A, axis=0)
# Make predictions across region of interest
predictions = sess.run(pred, feed_dict={X: A})
# Plot decision boundary in region of interest
z = predictions.reshape(xx.shape)
plt.contourf(xx, yy, z, cmap=cmap, alpha=.5)
plt.show()
# Get predicted labels on training data and plot
#train_labels = model.predict(X)
#ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap, lw=0)

Related

TensorFlow model's cost is constantly at 0

So I've been learning TensorFlow with this Computer Vision project and I'm not sure if I understand it well enough. I think I got the session part right, although graph seems to be the issue here. Here is my code:
def model_train(placeholder_dimensions, filter_dimensions, strides, learning_rate, num_epochs, minibatch_size, print_cost = True):
# for training purposes
tf.reset_default_graph()
# create datasets
train_set, test_set = load_dataset() custom function and and custom made dataset
X_train = np.array([ex[0] for ex in train_set])
Y_train = np.array([ex[1] for ex in train_set])
X_test = np.array([ex[0] for ex in test_set])
Y_test = np.array([ex[1] for ex in test_set])
#convert to one-hot encodings
Y_train = tf.one_hot(Y_train, depth = 10)
Y_test = tf.one_hot(Y_test, depth = 10)
m = len(train_set)
costs = []
tf.reset_default_graph()
graph = tf.get_default_graph()
with graph.as_default():
# create placeholders
X, Y = create_placeholders(*placeholder_dimensions)
# initialize parameters
parameters = initialize_parameters(filter_dimensions)
# forward propagate
Z4 = forward_propagation(X, parameters, strides)
# compute cost
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = Z4, labels = Y))
# define optimizer for backpropagation that minimizes the cost function
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# initialize variables
init = tf.global_variables_initializer()
# start session
with tf.Session() as sess:
sess.run(init)
for epoch in range(num_epochs):
minibatch_cost = 0.
num_minibatches = int(m / minibatch_size)
# get random minibatch
minibatches = random_minibatches(np.array([X_train, Y_train]), minibatch_size)
for minibatch in minibatches:
minibatch_X, minibatch_Y = minibatch
_ , temp_cost = sess.run([optimizer, cost], {X: minibatch_X, Y: minibatch_Y})
minibatch_cost += temp_cost / num_minibatches
if print_cost == True and epoch % 5 == 0:
print('Cost after epoch %i: %f' %(epoch, minibatch_cost))
if print_cost == True:
costs.append(minibatch_cost)
# plot the costs
plot_cost(costs, learning_rate)
# calculate correct predictions
prediction = tf.argmax(Z4, 1)
correct_prediction = tf.equal(prediction, tf.argmax(Y, 1))
# calculate accuracy on test set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
print('Training set accuracy:', train_accuracy)
print('Test set accuracy:', test_accuracy)
return parameters
where create_placeholder and initialize_parameters function are as follows:
def initialize_parameters(filter_dimensions):
# initialize weight parameters for convolution layers
W1 = tf.get_variable('W1', shape = filter_dimensions['W1'])
W2 = tf.get_variable('W2', shape = filter_dimensions['W2'])
parameters = {'W1': W1, 'W2': W2}
return parameters
def forward_propagation(X, parameters, strides):
with tf.variable_scope('model1'):
# first block
Z1 = tf.nn.conv2d(X, parameters['W1'], strides['conv1'], padding = 'VALID')
A1 = tf.nn.relu(Z1)
P1 = tf.nn.max_pool(A1, ksize = strides['pool1'], strides = strides['pool1'], padding = 'VALID')
# second block
Z2 = tf.nn.conv2d(P1, parameters['W2'], strides['conv2'], padding = 'VALID')
A2 = tf.nn.relu(Z2)
P2 = tf.nn.max_pool(A2, ksize = strides['pool2'], strides = strides['pool2'], padding = 'VALID')
# flatten
F = tf.contrib.layers.flatten(P2)
# dense block
Z3 = tf.contrib.layers.fully_connected(F, 50)
A3 = tf.nn.relu(Z3)
# output
Z4 = tf.contrib.layers.fully_connected(A3, 10, activation_fn = None)
return Z4
I have previous experience with Keras, yet i can't find what is the problem here.
I would check 2 things first:
#convert to one-hot encodings
Y_train = tf.one_hot(Y_train, depth = 10)
Y_test = tf.one_hot(Y_test, depth = 10)
Check if this code is outputting what you expect.
and second : check the model initialization, again, if it looks like you expect.
Just my 2 cents

How to use outputs from previous time steps as input along with other inputs in RNN using tensorflow?

In the following example, there are three time series and I want to predict another time series y which is a function of the three. How can I use four inputs to predict the time series where the fourth input is the output at previous time step?
import tensorflow as tf
import numpy as np
import pandas as pd
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
import matplotlib.pyplot as plt
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.arange(self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 32
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 105 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
dataX, dataY = gen_data.create_dataset(xx1,yy1)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
with tf.variable_scope('scope0'): #defining RNN
cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
I have looked at answers here and here which make use of tf.nn.raw_rnn but in those cases only the predicted output at previous time steps is used but I want the predicted output at previous timesteps plus other three inputs to be used for prediction.

loss: nan using Keras vs non-nan (working) output using tensorflow

I am trying to replicate old code that I had in tensorflow but in Keras format. For some reason my loss is always nan. I think the error is in the loss that I am using ('categorical_crossentropy' in keras vs 'tf.nn.softmax_cross_entropy_with_logits' in tensorflow)
Keras code:
import keras
from keras.models import Sequential
from keras.layers import Dropout, Dense, Activation
from keras.regularizers import l2
from keras.layers.normalization import BatchNormalization
# Keras items
from keras.optimizers import Adam, Nadam
from keras.activations import relu, elu
from keras.losses import binary_crossentropy, categorical_crossentropy
from keras import metrics
import pandas as pd
import numpy as np
x_main = pd.read_csv("glioma DB X.csv")
y_main = pd.read_csv("glioma DB Y.csv")
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_main, y_main, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
# train shape
np.shape(x_train), np.shape(y_train)
((132, 47), (132, 1))
# Normalize training data; will want to have the same mu and sigma for test
def normalize_features(dataset):
mu = np.mean(dataset, axis = 0) # columns
sigma = np.std(dataset, axis = 0)
norm_parameters = {'mu': mu,
'sigma': sigma}
return (dataset-mu)/(sigma+1e-10), norm_parameters
# Normal X data; using same mu and sigma from test set;
x_train, norm_parameters = normalize_features(x_train)
x_val = (x_val-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_test = (x_test-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
params = {'lr': 0.001,
'batch_size': 30,
'epochs': 8000,
'dropout': 0.5,
'weight_regulizer':['l2'],
'optimizer': 'adam',
'losses': 'categorical_crossentropy',
'activation':'relu',
'last_activation': 'softmax'}
from keras.utils.np_utils import to_categorical
#categorical_labels = to_categorical(int_labels, num_classes=None)
if params['losses']=='categorical_crossentropy':
y_train = to_categorical(y_train,num_classes=4)
y_val = to_categorical(y_val,num_classes=4)
y_test = to_categorical(y_test,num_classes=4)
model = Sequential()
# layer 1
model.add(Dense(30, input_dim=x_train.shape[1],
W_regularizer=l2(0.01),
kernel_initializer='he_uniform'))
model.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout']))
# layer 2
model.add(Dense(20, W_regularizer=l2(0.01),
kernel_initializer='he_uniform'))
model.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout']))
# if we want to also test for number of layers and shapes, that's possible
#hidden_layers(model, params, 1)
# Last layer
model.add(Dense(4, activation=params['last_activation'],
kernel_initializer='he_uniform'))
model.compile(loss=params['losses'],
optimizer=keras.optimizers.adam(lr=params['lr']),
metrics=['categorical_accuracy'])
history = model.fit(x_train, y_train,
validation_data=[x_val, y_val],
batch_size=params['batch_size'],
epochs=params['epochs'],
verbose=1)
Working code using tensorflow which gives me a pretty loss graph haha:
x_train, x_test, y_train, y_test = train_test_split(X_main, Y_main, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
# ANOTHER OPTION IS TO USE SKLEARN sklearn.model_selection.ShuffleSplit
# look into stratification
# Normalize training data; will want to have the same mu and sigma for test
def normalize_features(dataset):
mu = np.mean(dataset, axis = 0) # columns
sigma = np.std(dataset, axis = 0)
norm_parameters = {'mu': mu,
'sigma': sigma}
return (dataset-mu)/(sigma+1e-10), norm_parameters
# TRY LOG TRANSFORMATION LOG(1+X) to deal with outliers
# change ordinal to one hot vector
# to make label encoder
# for c in x_train.columns[x_train.dtype == 'object']:
# X[c] (which was copy of xtrain) X[c].factorize()[0]
# able to plot feature importance in random forest
# Normal X data; using same mu and sigma from test set; then transposed
x_train, norm_parameters = normalize_features(x_train)
x_val = (x_val-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_test = (x_test-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_train = np.transpose(x_train)
x_val = np.transpose(x_val)
x_test = np.transpose(x_test)
y_train = np.transpose(y_train)
y_val = np.transpose(y_val)
y_test = np.transpose(y_test)
# converting values from database to matrix
x_train = x_train.as_matrix()
x_val = x_val.as_matrix()
x_test = x_test.as_matrix()
y_train = y_train.as_matrix()
y_val = y_val.as_matrix()
y_test = y_test.as_matrix()
# testing shape
#print(y_train.shape)
#print(y_val.shape)
#print(y_test.shape)
#
#print(x_train.shape)
#print(x_val.shape)
#print(x_test.shape)
# convert y to array per value so 3 = [0 0 1]
def convert_to_one_hot(Y, C):
Y = np.eye(C)[Y.reshape(-1)].T
return Y
y_train = convert_to_one_hot(y_train, 4)
y_val = convert_to_one_hot(y_val, 4)
y_test = convert_to_one_hot(y_test, 4)
print ("number of training examples = " + str(x_train.shape[1]))
print ("number of test examples = " + str(x_test.shape[1]))
print ("X_train shape: " + str(x_train.shape))
print ("Y_train shape: " + str(y_train.shape))
print ("X_test shape: " + str(x_test.shape))
print ("Y_test shape: " + str(y_test.shape))
# minibatches for later
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
"""
Creates a list of random minibatches from (X, Y)
Arguments:
X -- input data, of shape (input size, number of examples)
Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
mini_batch_size - size of the mini-batches, integer
seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
Returns:
mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
"""
m = X.shape[1] # number of training examples
mini_batches = []
# Step 1: Shuffle (X, Y)
permutation = list(np.random.permutation(m))
shuffled_X = X[:, permutation]
shuffled_Y = Y[:, permutation].reshape((Y.shape[0],m))
# Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
for k in range(0, num_complete_minibatches):
mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
# starting TF graph
# Create X and Y placeholders
def create_xy_placeholder(n_x, n_y):
X = tf.placeholder(tf.float32, shape = [n_x, None], name = 'X')
Y = tf.placeholder(tf.float32, shape = [n_y, None], name = 'Y')
return X, Y
# initialize parameters hidden layers
def initialize_parameters(n_x, scale, hidden_units):
hidden_units= [n_x] + hidden_units
parameters = {}
regularizer = tf.contrib.layers.l2_regularizer(scale)
for i in range(0, len(hidden_units[1:])):
with tf.variable_scope('hidden_parameters_'+str(i+1)):
w = tf.get_variable("W"+str(i+1), [hidden_units[i+1], hidden_units[i]],
initializer=tf.contrib.layers.xavier_initializer(),
regularizer=regularizer)
b = tf.get_variable("b"+str(i+1), [hidden_units[i+1], 1],
initializer = tf.constant_initializer(0.1))
parameters.update({"W"+str(i+1): w})
parameters.update({"b"+str(i+1): b})
return parameters
# forward progression with batch norm and dropout
def forward_propagation(X, parameters, batch_norm=False, keep_prob=1):
a_new = X
for i in range(0, int(len(parameters)/2)-1):
with tf.name_scope('forward_pass_'+str(i+1)):
w = parameters['W'+str(i+1)]
b = parameters['b'+str(i+1)]
z = tf.matmul(w, a_new) + b
if batch_norm == True:
z = tf.layers.batch_normalization(z, momentum=0.99, axis=0)
a = tf.nn.relu(z)
if keep_prob < 1:
a = tf.nn.dropout(a, keep_prob)
a_new = a
tf.summary.histogram('act_'+str(i+1), a_new)
# calculating final Z before input into cost as logit
with tf.name_scope('forward_pass_'+str(int(len(parameters)/2))):
w = parameters['W'+str(int(len(parameters)/2))]
b = parameters['b'+str(int(len(parameters)/2))]
z = tf.matmul(w, a_new) + b
if batch_norm == True:
z = tf.layers.batch_normalization(z, momentum=0.99, axis=0)
return z
# compute cost with option for l2 regularizatoin
def compute_cost(z, Y, parameters, l2_reg=False):
with tf.name_scope('cost'):
logits = tf.transpose(z)
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits,
labels = labels))
if l2_reg == True:
reg = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
cost = cost + tf.reduce_sum(reg)
with tf.name_scope('Pred/Accuracy'):
prediction=tf.argmax(z)
correct_prediction = tf.equal(tf.argmax(z), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
return cost, prediction, accuracy
# defining the model (need to add keep_prob for dropout)
def model(X_train, Y_train, X_test, Y_test,
hidden_units=[30, 20, 4], # hidden units/layers
learning_rate = 0.0001, # Learning rate
num_epochs = 10000, minibatch_size = 30, # minibatch/ number epochs
keep_prob=0.5, # dropout
batch_norm=True, # batch normalization
l2_reg=True, scale = 0.01, # L2 regularization/scale is lambda
print_cost = True):
ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables
tf.set_random_seed(1) # to keep consistent results
seed = 3 # to keep consistent results
(n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set)
n_y = Y_train.shape[0] # n_y : output size
costs = [] # To keep track of the cost
# Create Placeholders of shape (n_x, n_y)
X, Y = create_xy_placeholder(n_x, n_y)
# Initialize parameters
parameters = initialize_parameters(n_x, scale, hidden_units)
# Forward propagation: Build the forward propagation in the tensorflow graph
z = forward_propagation(X, parameters, keep_prob, batch_norm)
# Cost function: Add cost function to tensorflow graph
cost, prediction, accuracy = compute_cost(z, Y, parameters, l2_reg)
# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
# Initialize all the variables
init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# Start the session to compute the tensorflow graph
with tf.Session(config=config) as sess:
# Run the initialization
sess.run(init)
# Do the training loop
for epoch in range(num_epochs):
epoch_cost = 0. # Defines a cost related to an epoch
num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
seed = seed + 1
minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
# Select a minibatch
(minibatch_X, minibatch_Y) = minibatch
# IMPORTANT: The line that runs the graph on a minibatch.
# Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
_ , minibatch_cost = sess.run([optimizer, cost],
feed_dict = {X: minibatch_X, Y: minibatch_Y})
epoch_cost += minibatch_cost / num_minibatches
# Print the cost every epoch
if print_cost == True and epoch % 100 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
prediction1=tf.argmax(z)
# print('Z5: ', Z5.eval(feed_dict={X: minibatch_X, Y: minibatch_Y}))
print('prediction: ', prediction1.eval(feed_dict={X: minibatch_X,
Y: minibatch_Y}))
correct1=tf.argmax(Y)
# print('Y: ', Y.eval(feed_dict={X: minibatch_X,
# Y: minibatch_Y}))
print('correct: ', correct1.eval(feed_dict={X: minibatch_X,
Y: minibatch_Y}))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
# lets save the parameters in a variable
parameters = sess.run(parameters)
print ("Parameters have been trained!")
# Calculate the correct predictions
correct_prediction = tf.equal(tf.argmax(z), tf.argmax(Y))
# Calculate accuracy on the test set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
# run model on test data
parameters = model(x_train, y_train, x_test, y_test, keep_prob=1)

MLP unable to learn a system of linear equation

I am using a simple MLP to solve a system of linear equations of the form
y = Ax
Here A is a matrix that maps x to y. I generate synthetic data by fixing A and randomly generating x. In my view, a MLP should be able to learn this simple mapping but my network fails to perform well.
This is how I generate my data:
import numpy as np
import scipy.sparse as sps
import tensorflow as tf
import matplotlib as mp
import matplotlib.pyplot as plt
N_x = 100
N_y =50
d = 0.1
m_tr = 10000
m_val = 10000
m_te = 120
A = np.random.randn(N_y,N_x)
A = A/np.linalg.norm(A,2, axis = 0)
#Generate x_train
x_train = sps.hstack([sps.random(N_x, 1, density=d, format='coo', dtype=None,
random_state=None) for _ in range(m_tr)]).toarray()
x_train = np.random.rand(1, m_tr)*x_train/np.linalg.norm(x_train,2, axis = 0)
x_test = sps.hstack([sps.random(N_x, 1, density=d, format='coo', dtype=None,
random_state=None) for _ in range(m_te)]).toarray()
x_test = np.random.rand(1, m_te)*x_test/np.linalg.norm(x_test,2, axis = 0)
y_train = np.matmul(A,x_train)
y_test = np.matmul(A,x_test)
train_data = ((y_train.T,x_train.T))
test_data = ((y_test.T,x_test.T))
and this is my MLP
# Parameters
learning_rate = 0.001
training_epochs = 20
Batch_Size = 100
batch_size = tf.placeholder(tf.int64)
n_batches = m_tr//Batch_Size
def fc_layer(input_, channels_in,channels_out, name = "fc"):
with tf.name_scope(name):
W = tf.Variable(tf.random_normal([channels_in, channels_out], stddev=0.1), name="weights")
b = tf.Variable(tf.constant(0.1, shape=[channels_out]), name="biases")
act = tf.matmul(input_, W) + b
return act
# Setup placeholders, and reshape the data
y = tf.placeholder(tf.float32, shape=[None,N_y], name = 'y')
x = tf.placeholder(tf.float32, shape=[None,N_x], name = 'x')
dataset = tf.data.Dataset.from_tensor_slices((y, x)).batch(batch_size).repeat()
iterator = dataset.make_initializable_iterator()
input_features, output_features = iterator.get_next()
fc_1_linear = fc_layer(input_features, N_y,256, name = "fc1")
fc_1 = tf.nn.relu(fc_1_linear)
fc_2_linear= fc_layer(fc_1, 256,512, name = "fc2")
fc_2 = tf.nn.relu(fc_2_linear)
out_layer= fc_layer(fc_2, 512,N_x, name = "fc3")
with tf.name_scope('loss'):
loss_op = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.squared_difference(out_layer,output_features),1)))
with tf.name_scope('train'):
train_op = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss_op)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# initialise iterator with train data
sess.run(iterator.initializer, feed_dict={ y: train_data[0], x: train_data[1], batch_size: Batch_Size})
print('Training...')
end_loss = 0
for i in range(training_epochs):
tot_loss = 0
for _ in range(n_batches):
temp, loss_value = sess.run([train_op, loss_op])
tot_loss += loss_value
step = i*n_batches+ _
if (step)>10:
train_summary = tf.Summary()
train_summary.ParseFromString(sess.run(merged_op))
writer.add_summary(train_summary,step)
end_loss = tot_loss/n_batches
print("Iter: {}, Loss: {:.4f}".format(i, end_loss))
# initialise iterator with test data
sess.run(iterator.initializer, feed_dict={ y: test_data[0], x: test_data[1], batch_size: test_data[0].shape[0]})
print('Test Loss: {:4f}',sess.run(loss_op))
print('Generalization Error: {:4f}'.format(sess.run(loss_op)-end_loss))
I am not sure what the problem is. The loss seems to decrease with each epoch but on plotting the actual vector and the reconstructed vector, there is a lot of discrepency.

Plotting decision boundary Line for a binary classifier

I currently trained a logistic model for a decision boundary that looks like this:
using the following code that I got online:
x_min, x_max = xbatch[:, 0].min() - .5, xbatch[:, 0].max() + .5
y_min, y_max = xbatch[:, 1].min() - .5, xbatch[:, 1].max() + .5
h = 0.05
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
X = np.vstack( ( xx.reshape(1, np.product(xx.shape)), yy.reshape(1, np.product(yy.shape)) ) ).T
# Predict the function value for the whole grid
z1 = np.dot(X, w1_pred)+b1_pred
h1 = 1 / (1 + np.exp(-z1))
z2 = np.dot(h1, w2_pred)+b2_pred
y_hat = 1 / (1 + np.exp(-z2))
pred = np.round(y_hat)
Z = pred.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z)
plt.scatter(xbatch[:, 0], xbatch[:, 1], c=ybatch, s=40, edgecolors="grey", alpha=0.9)
My question is this:
is there a way to plot the decision line without meshgrid or contour?
I would like to just plot the wave sigmoid function on the graph. without the colours or contours so it looks like this:
Use contour with level=[0.5] for sigmoid should work.
A Synthetic training set:
train_X = np.random.multivariate_normal([2.2, 2.2], [[0.1,0],[0,0.1]], 150)
train_Y = np.zeros(150)
train_X = np.concatenate((train_X, np.random.multivariate_normal([1.4, 1.3], [[0.05,0],[0,0.3]], 50)), axis=0)
train_Y = np.concatenate((train_Y, np.ones(50)))
train_X = np.concatenate((train_X, np.random.multivariate_normal([1.3, 2.9], [[0.05,0],[0,0.05]], 50)), axis=0)
train_Y = np.concatenate((train_Y, np.ones(50)))
train_X = np.concatenate((train_X, np.random.multivariate_normal([2.5, 0.95], [[0.1,0],[0,0.1]], 50)), axis=0)
train_Y = np.concatenate((train_Y, np.ones(50)))
An example model:
x = tf.placeholder(tf.float32, [None, 2])
y = tf.placeholder(tf.float32, [None,1])
#Input to hidden units
w_i_h = tf.Variable(tf.truncated_normal([2, 2],mean=0, stddev=0.1))
b_i_h = tf.Variable(tf.zeros([2]))
hidden = tf.sigmoid(tf.matmul(x, w_i_h) + b_i_h)
#hidden to output
w_h_o = tf.Variable(tf.truncated_normal([2, 1],mean=0, stddev=0.1))
b_h_o = tf.Variable(tf.zeros([1]))
logits = tf.sigmoid(tf.matmul(hidden, w_h_o) + b_h_o)
cost = tf.reduce_mean(tf.square(logits-y))
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(cost)
correct_prediction = tf.equal(tf.sign(logits-0.5), tf.sign(y-0.5))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#Initialize all variables
init = tf.global_variables_initializer()
#Launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(3000):
_, c = sess.run([optimizer, cost], feed_dict={x:train_X, y:np.reshape(train_Y, (train_Y.shape[0],1))})
if epoch%1000 == 0:
print('Epoch: %d' %(epoch+1), 'cost = {:0.4f}'.format(c), end='\r')
acc = sess.run([accuracy] , feed_dict={x:train_X, y:np.reshape(train_Y, (train_Y.shape[0],1))})
print('\n Accuracy:', acc)
xx, yy = np.mgrid[0:3.5:0.1, 0:3.5:0.1]
grid = np.c_[xx.ravel(), yy.ravel()]
pred_1 = sess.run([logits], feed_dict={x:grid})
The output:
Z = np.array(pred_1).reshape(xx.shape)
plt.contour(xx, yy, Z, levels=[0.5], cmap='gray')
plt.scatter(train_X[:,0], train_X[:,1], s=20, c=train_Y, cmap='jet', vmin=0, vmax=1)
plt.show()
:

Categories