I have implemented a RBF neural network classifier.
I use my implementation to classify the MNIST dataset, but it is not learning and always just predicts a single class. I would be very grateful if someone could help me identify the problem with my implementation.
I have to note that the implementation is quite slow due to the fact it works example by example, but I don't know how to make it such that it works batch by batch. (I am new to tensorflow and python in general)
My implementation is as follows:
class RBF_NN:
def __init__(self, M, K, L, lr):
#Layer sizes
self.M = M #input layer size - number of features
self.K = K #RBF layer size
self.L = L #output layer size - number of classes
#
x = tf.placeholder(tf.float32,shape=[M])
matrix = tf.reshape(tf.tile(x,multiples=[K]),shape=[K,M])
prototypes_input = tf.placeholder(tf.float32,shape=[K,M])
prototypes = tf.Variable(prototypes_input) # prototypes - representatives of the data
r = tf.reduce_sum(tf.square(prototypes-matrix),1)
s = tf.Variable(tf.random.uniform(shape=[K],maxval=1)) #scaling factors
h = tf.exp(-r/(2*tf.pow(s,2)))
W = tf.Variable(tf.random.uniform(shape=[K,L],maxval=1))
b = tf.Variable(tf.constant(0.1, shape=[L]))
o = tf.matmul(tf.transpose(tf.expand_dims(h,1)),W) + b
pred_class = tf.argmax(o,1)
y = tf.placeholder(shape=[L], dtype=tf.float32)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=o, labels=y))
optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
self.x = x
self.prototypes_input = prototypes_input
self.prototypes = prototypes
self.r = r
self.s = s
self.h = h
self.W = W
self.b = b
self.o = o
self.y = y
self.loss = loss
self.optimizer = optimizer
self.pred_class = pred_class
def fit(self,X,y,prototypes,epoch_count,print_step,sess):
for epoch in range(epoch_count):
epoch_loss = 0
for xi,yi in zip(X,y):
iter_loss, _ = sess.run((self.loss,self.optimizer),feed_dict={self.x: xi, self.y: yi, self.prototypes_input:prototypes})
epoch_loss = epoch_loss + iter_loss
epoch_loss = epoch_loss/len(X)
if epoch%print_step == 0:
print("Epoch loss",(epoch+1),":",epoch_loss)
def predict(self,x,sess):
return sess.run((self.pred_class),feed_dict={self.x:x})[0]
def get_prototypes(self,sess):
return sess.run((self.prototypes))
Usage:
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
y_train = to_one_hot(y_train,10)
y_test = to_one_hot(y_test,10)
x_train = np.asarray([np.asarray(x).reshape(-1) for x in x_train])
x_test = np.asarray([np.asarray(x).reshape(-1) for x in x_test])
M = 784
K = 1000
L = 10
lr = 0.01
rbfnn = RBF_NN(M,K,L,lr)
#Selecting prototypes from the train set
idx = np.random.randint(len(x_train), size=K)
prototypes = x_train[idx,:]
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init,feed_dict={rbfnn.prototypes_input:prototypes})
rbfnn.fit(x_train,y_train,prototypes,epoch_count=1, print_step=1,sess=sess)
y_test_p = []
for xi,yi in zip(x_test,y_test):
yp = rbfnn.predict(xi,sess=sess)
y_test_p.append(yp)
y_test_t = [np.argmax(yi) for yi in y_test]
acc = accuracy_score(y_test_t,y_test_p,)
precc = precision_score(y_test_t,y_test_p, average='macro')
recall = recall_score(y_test_t,y_test_p, average = 'macro')
f1 = f1_score(y_test_t,y_test_p,average='macro')
print("Accuracy:",acc)
print("Precision:",precc)
print("Recall:",recall)
print("F1 score:",f1)
sess.close()
The implementation is fine. However, it seems to be very sensitive to the data.
It will start learning just fine if the following lines are added:
x_train = (x_train-x_train.min())/(x_train.max()-x_train.min())
x_test = (x_test-x_test.min())/(x_test.max()-x_test.min())
In this way the data is normalized so that the interval of each feature is from 0 to 1.
Related
I'm new to neural networks, and I'm trying to write my first network.
Here's the data I'm trying to train her on.
X, y = make_classification(n_samples=1024,n_features=2, n_informative=2, n_redundant=0, n_classes=2, random_state=2)
Generated data
Here is my network:
def f(x):
return 1/(1+np.exp(-x))
def df(x):
return f(x)*(1-f(x))
def init():
np.random.seed(42)
W1 = np.random.rand(2,2)
W2 = np.random.rand(1,2)
return W1,W2
def go_forward(inp,W1,W2):
sum = np.dot(W1,inp)
out = np.array([f(x) for x in sum])
sum_1 = np.dot(W2,out)
out_1 = f(sum_1)
y = 1 if out_1>0.5 else 0
return (y,out)
def train_one_epoch(X_train,y_train,W2,W1,lr):
err = []
for k in range(len(X_train)):
ind = np.random.randint(0,len(X_train))
x,y = X_train[ind],y_train[ind]
pred, out = go_forward(x,W1,W2)
e = pred - y
delta = e*df(pred)
W2 = W2 - lr*delta*out
delta2 = W2*delta*df(out)
W1 = W1 - np.array(x)*delta2*lr
err.append(e)
return W1,W2,np.mean(np.array(err))
epoch = 100
lr = 0.01
err = []
W1,W2 = init()
for e in range(epoch):
W1,W2,e = train_one_epoch(X_train,y_train,W2,W1,lr)
err.append(e)
The error graph is bothering me
error graph
here is an accuracy comparison
My Network - 0.751953125
sklearn.linear_model.LogisticRegression - 0.974609375
Test data
My Network Results
Results sklearn.linear_model.LogisticRegression
I'm at a loss why the network can't solve such a simple problem
I have tried changing the number of epochs, but it does not lead to anything.
Halo,
I'm new in machine learning and Python and I want to predict the Kaggle House Sales in King County dataset with my gradient descent.
I'm splitting 70% (15k rows) training and 30% (6k rows) testing and I choose 5 features from 19, but there is a performance issue, the algorithm took so much time (more than 11 hours), 100% memory and failed to execute.
This is my Gradient Descent class:
class GradientDescent:
X_train = []
Y_train = []
X_test = []
Y_test = []
lr = 0
max_iter = 0
theta = 0
def __init__(self, X_train,Y_train,X_test,Y_test, lr=0.01, max_iter=100):
self.X_train = X_train
self.Y_train = Y_train
self.X_test = X_test
self.Y_test = Y_test
self.lr = lr
self.max_iter = max_iter
self.theta = np.random.randn(X_train.shape[1], 1)
print(self.theta)
def costFunction(self,theta,X,y):
"1/2m * E(h0-y)**2"
m = len(y)
y_pred = X.dot(theta)
cost = (1/2*m) * np.sum(np.square(y_pred-y))
return cost
def estimate(self):
m = len(self.Y_train)
mse_hist = np.zeros(self.max_iter)
#theta_hist = np.zeros(max_iter)
i = 0
while i < self.max_iter or mse_hist[i] > 0.01:
y_pred = np.dot(self.X_train,self.theta)
error = y_pred-self.Y_train
self.theta = self.theta - (1/m)*self.lr*(self.X_train.T.dot((error)))
mse_hist[i] = self.costFunction(self.theta,self.X_train, self.Y_train)
#print(mse_hist[i])
i+=1
return (self.theta, mse_hist)
def test(self):
res = pd.DataFrame()
for i,row in self.X_test.iterrows():
price_pred = np.dot(row.values,self.theta)
res = row
res['price_actual'] = self.Y_test[i]
res['price_predict'] = price_pred
res['r2_score'] = r2_score(res['price_actual'].values, res['price_predict'])
res.to_csv('output.csv')
Any advice to make it better?
In general the code seems fine though I haven't tested it. The only error I could find was that you may not be incrementing i in your while loop so the loop is never exiting.
In the following example, there are three time series and I want to predict another time series y which is a function of the three. How can I use four inputs to predict the time series where the fourth input is the output at previous time step?
import tensorflow as tf
import numpy as np
import pandas as pd
#clean computation graph
tf.reset_default_graph()
tf.set_random_seed(777) # reproducibility
np.random.seed(0)
import matplotlib.pyplot as plt
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# noise term prevents the zero division
return numerator / (denominator + 1e-7)
class generate_data(object):
def __init__(self, data_len, in_series, y_pred, seq_lengths, method='sum' ):
self.data_len = data_len
self.data = None
self.in_series = in_series #number of input series
self.y_pred = y_pred #number of final outputs from model
self.seq_lengths = seq_lengths
self.method = method
def _f(self, x):
y = 0
result = []
for _ in x:
result.append(y)
y += np.random.normal(scale=1)
return np.array(result)
def _runningMean(self, x, N):
return np.convolve(x, np.ones((N,))/N)[(N-1):]
def sine(self):
DATA = np.zeros((self.data_len, self.in_series))
xx = [None]
data_0 = np.sin(np.arange(self.data_len*self.in_series))
xx = data_0.reshape(self.data_len, self.in_series)
DATA[:,0: self.in_series] = xx
y = self._get_y(DATA)
return xx,y, DATA
def _get_y(self, xx):
if self.method=='sum':
yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'self_mul':
yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
elif self.method == 'mean_mirror':
yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
return yy
def normalize(self, xx1,yy1):
yy = [None]*len(yy1)
YMinMax = {}
xx = MinMaxScaler(xx1)
for i in range(self.y_pred):
YMinMax['ymin_' + str(i)] = np.min(yy1[0])
YMinMax['ymax_' + str(i)] = np.max(yy1[0])
yy[i] = MinMaxScaler(yy1[0])
setattr(self, 'YMinMax', YMinMax)
return xx,yy
def create_dataset(self, xx, yy):
'''creates a dataset consisting of windows for x and y data'''
dataX = self._build_input_windows(xx, self.seq_lengths)
if self.y_pred > 1:
pass
elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
pass
else:
dataY = self._build_y_windows(yy[0] , self.seq_lengths)
return dataX, dataY
def _build_input_windows(self, time_series, seq_length):
dataX = []
for i in range(0, len(time_series) - seq_length):
_x = time_series[i:i + seq_length, :]
dataX.append(_x)
return np.array(dataX)
def _build_y_windows(self, iny, seq_length):
dataY = []
for i in range(0, len(iny) - seq_length):
_y = iny[i + seq_length, ] # Next close price
dataY.append(_y)
return np.array(dataY)
def TrainTestSplit(self, dataX, dataY, train_frac):
train_size = int(len(dataY) * train_frac)
trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])
trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
trainY = trainY.reshape(len(trainY), 1)
testY = testY.reshape(len(testY), 1)
return trainX, trainY, testX, testY, train_size
#training/hyper parameters
tot_epochs = 500
batch_size = 32
learning_rate = 0.01
seq_lengths = 5 #sequence lengths/window size for RNN
rnn_inputs = 3 # no of inputs for RNN
y_pred = 1
data_length = 105 #this can be overwritten or useless
gen_data = generate_data(data_length, rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine()
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
dataX, dataY = gen_data.create_dataset(xx1,yy1)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)
keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y = tf.placeholder(tf.float32, [None, 1])
with tf.variable_scope('scope0'): #defining RNN
cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
Y_pred = Y_pred1
## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y)) # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
with tf.Session() as sess:
saver = tf.train.Saver(max_to_keep=41)
writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
# Training step
for epoch in range(tot_epochs):
total_batches = int(train_size / batch_size) ##total batches/ no. of steps in an epoch
#for batch in range(total_batches):
_, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
# # evaluating on test data
test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )
#evaluating on training data
train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})
rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
print("RMSE: {}".format(rmse_val))
# Plot predictions
fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
fig.set_figwidth(14)
fig.set_figheight(5)
ax2.plot(testY, 'b', label='observed')
ax2.plot(test_predict, 'k', label='predicted')
ax2.legend(loc="best")
ax2.set_xlabel("Time Period")
ax2.set_title('Testing')
ax1.plot(trainY, 'b', label='observed')
ax1.plot(train_predict, 'k',label= 'predicted')
ax1.legend(loc="best")
ax1.set_xlabel("Time Period")
ax1.set_ylabel("discharge (cms)")
ax1.set_title('Training')
plt.show()
I have looked at answers here and here which make use of tf.nn.raw_rnn but in those cases only the predicted output at previous time steps is used but I want the predicted output at previous timesteps plus other three inputs to be used for prediction.
I am using a simple MLP to solve a system of linear equations of the form
y = Ax
Here A is a matrix that maps x to y. I generate synthetic data by fixing A and randomly generating x. In my view, a MLP should be able to learn this simple mapping but my network fails to perform well.
This is how I generate my data:
import numpy as np
import scipy.sparse as sps
import tensorflow as tf
import matplotlib as mp
import matplotlib.pyplot as plt
N_x = 100
N_y =50
d = 0.1
m_tr = 10000
m_val = 10000
m_te = 120
A = np.random.randn(N_y,N_x)
A = A/np.linalg.norm(A,2, axis = 0)
#Generate x_train
x_train = sps.hstack([sps.random(N_x, 1, density=d, format='coo', dtype=None,
random_state=None) for _ in range(m_tr)]).toarray()
x_train = np.random.rand(1, m_tr)*x_train/np.linalg.norm(x_train,2, axis = 0)
x_test = sps.hstack([sps.random(N_x, 1, density=d, format='coo', dtype=None,
random_state=None) for _ in range(m_te)]).toarray()
x_test = np.random.rand(1, m_te)*x_test/np.linalg.norm(x_test,2, axis = 0)
y_train = np.matmul(A,x_train)
y_test = np.matmul(A,x_test)
train_data = ((y_train.T,x_train.T))
test_data = ((y_test.T,x_test.T))
and this is my MLP
# Parameters
learning_rate = 0.001
training_epochs = 20
Batch_Size = 100
batch_size = tf.placeholder(tf.int64)
n_batches = m_tr//Batch_Size
def fc_layer(input_, channels_in,channels_out, name = "fc"):
with tf.name_scope(name):
W = tf.Variable(tf.random_normal([channels_in, channels_out], stddev=0.1), name="weights")
b = tf.Variable(tf.constant(0.1, shape=[channels_out]), name="biases")
act = tf.matmul(input_, W) + b
return act
# Setup placeholders, and reshape the data
y = tf.placeholder(tf.float32, shape=[None,N_y], name = 'y')
x = tf.placeholder(tf.float32, shape=[None,N_x], name = 'x')
dataset = tf.data.Dataset.from_tensor_slices((y, x)).batch(batch_size).repeat()
iterator = dataset.make_initializable_iterator()
input_features, output_features = iterator.get_next()
fc_1_linear = fc_layer(input_features, N_y,256, name = "fc1")
fc_1 = tf.nn.relu(fc_1_linear)
fc_2_linear= fc_layer(fc_1, 256,512, name = "fc2")
fc_2 = tf.nn.relu(fc_2_linear)
out_layer= fc_layer(fc_2, 512,N_x, name = "fc3")
with tf.name_scope('loss'):
loss_op = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.squared_difference(out_layer,output_features),1)))
with tf.name_scope('train'):
train_op = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss_op)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# initialise iterator with train data
sess.run(iterator.initializer, feed_dict={ y: train_data[0], x: train_data[1], batch_size: Batch_Size})
print('Training...')
end_loss = 0
for i in range(training_epochs):
tot_loss = 0
for _ in range(n_batches):
temp, loss_value = sess.run([train_op, loss_op])
tot_loss += loss_value
step = i*n_batches+ _
if (step)>10:
train_summary = tf.Summary()
train_summary.ParseFromString(sess.run(merged_op))
writer.add_summary(train_summary,step)
end_loss = tot_loss/n_batches
print("Iter: {}, Loss: {:.4f}".format(i, end_loss))
# initialise iterator with test data
sess.run(iterator.initializer, feed_dict={ y: test_data[0], x: test_data[1], batch_size: test_data[0].shape[0]})
print('Test Loss: {:4f}',sess.run(loss_op))
print('Generalization Error: {:4f}'.format(sess.run(loss_op)-end_loss))
I am not sure what the problem is. The loss seems to decrease with each epoch but on plotting the actual vector and the reconstructed vector, there is a lot of discrepency.
I am trying to reproduce a deep learning regression result in Tensorflow. If I train a neural network with the MLPRegressor class from sklearn I get very nice results of 98% validation.
The MLPRegressor:
http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor
I am trying to reproduce the model in Tensorflow. By copying the default values of the MLPRegressor class in a Tensorflow model. However I cannot get the same result. I only get 75% most of the time.
My TF model:
tf.reset_default_graph()
graph = tf.Graph()
n_input = 3 # n variables
n_hidden_1 = 100
n_hidden_2 = 1
n_output = 1
beta = 0.001
learning_rate = 0.001
with graph.as_default():
tf_train_feat = tf.placeholder(tf.float32, shape=(None, n_input))
tf_train_label = tf.placeholder(tf.float32, shape=(None))
tf_test_feat = tf.constant(test_feat, tf.float32)
"""
Weights and biases. The weights matix' columns will be the output vector.
* ndarray([rows, columns])
* ndarray([in, out])
tf.placeholder(None) and tf.placeholder([None, 3]) means that the row's size is not set. In the second
placeholder the columns are prefixed at 3.
"""
W = {
"layer_1": tf.Variable(tf.truncated_normal([n_input, n_hidden_1])),
"layer_2": tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])),
"layer_3": tf.Variable(tf.truncated_normal([n_hidden_2, n_output])),
}
b = {
"layer_1": tf.Variable(tf.zeros([n_hidden_1])),
"layer_2": tf.Variable(tf.zeros([n_hidden_2])),
}
def computation(X):
layer_1 = tf.nn.relu(tf.matmul(X, W["layer_1"]) + b["layer_1"])
layer_2 = tf.nn.relu(tf.matmul(layer_1, W["layer_2"]) + b["layer_2"])
return layer_2
tf_prediction = computation(tf_train_feat)
tf_test_prediction = computation(tf_test_feat)
tf_loss = tf.reduce_mean(tf.pow(tf_train_label - tf_prediction, 2))
tf_loss = tf.reduce_mean( tf_loss + beta * tf.nn.l2_loss(W["layer_2"]) )
tf_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
#tf_optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(tf_loss)
init = tf.global_variables_initializer()
My TF session:
def accuracy(y_pred, y):
a = 0
for i in range(y.shape[0]):
a += abs(1 - y_pred[i][0] / y[i])
return round((1 - a / y.shape[0]) * 100, 3)
def accuracy_tensor(y_pred, y):
a = 0
for i in range(y.shape[0]):
a += abs(1 - y_pred[i][0] / y[i])
return round((1 - a / y.shape[0]) * 100, 3)
# Shuffles two arrays.
def shuffle_in_unison(a, b):
assert len(a) == len(b)
shuffled_a = np.empty(a.shape, dtype=a.dtype)
shuffled_b = np.empty(b.shape, dtype=b.dtype)
permutation = np.random.permutation(len(a))
for old_index, new_index in enumerate(permutation):
shuffled_a[new_index] = a[old_index]
shuffled_b[new_index] = b[old_index]
return shuffled_a, shuffled_b
train_epoch = int(5e4)
batch = int(200)
n_batch = int(X.shape[0] // batch)
prev_acc = 0
stable_count = 0
session = tf.InteractiveSession(graph=graph)
session.run(init)
print("Initialized.\n No. of epochs: %d.\n No. of batches: %d." % (train_epoch, n_batch))
for epoch in range(train_epoch):
offset = (epoch * n_batch) % (Y.shape[0] - n_batch)
for i in range(n_batch):
x = X[offset:(offset + n_batch)]
y = Y[offset:(offset + n_batch)]
x, y = shuffle_in_unison(x, y)
feed_dict = {tf_train_feat: x, tf_train_label: y}
_, l, pred, pred_label = session.run([tf_optimizer, tf_loss, tf_prediction, tf_train_label], feed_dict=feed_dict)
if epoch % 1 == 0:
print("Epoch: %d. Batch' loss: %f" %(epoch, l))
test_pred = tf_test_prediction.eval(session=session)
acc_test = accuracy(test_pred, test_label)
acc_train = accuracy_tensor(pred, pred_label)
print("Accuracy train set %s%%" % acc_train)
print("Accuracy test set: %s%%" % acc_test)
Am I missing something in the Tensorflow code? Thanks!
Unless you have a very good reason to not use them, regression should have linear output units. I ran into a similar problem a while back and ended up using linear outputs and linear hidden units which seemed to mirror the mlpregressor in my case.
There is a great section in Goodfellow's Deep Learning Book in chapter 6, starting at page 181, that goes over the activation functions.
At the very least try this for your output layer
layer_2 = tf.matmul(layer_1, W["layer_2"]) + b["layer_2"]