I built a tensorflow graph according to a few different resources online and am trying to fit a curve to y=x^2. I am using two hidden layers with 25 and 10 neurons respectively. The code completes, but the cost function never reduces and the ultimate chart of x_test to preds is clearly wrong. I've looked at lots of different resources and some O'reilly books, and I can't for the life of me figure out where I'm going wrong. Any help would be greatly appreciated.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from sklearn.model_selection import train_test_split
tf.reset_default_graph()
# Create Data
X_range = np.arange(-50,50,0.1)
Y_range = (X_range ** 2)
data = pd.DataFrame({"x": X_range, "y":Y_range})
x = data['x'].values.reshape(1000,1)
y = data['y'].values.reshape(1000,)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = .30, random_state = 42)
c_t = []
# Create NNModel function
def neural_net_model(x_data, input_dim):
# Build the First Layer
W_1 = tf.Variable(tf.random_uniform([input_dim, 25]))
b_1 = tf.Variable(tf.zeros([25]))
layer_1 = tf.add(tf.matmul(x_data, W_1), b_1)
layer_1 = tf.nn.relu(layer_1)
# Build the Second Layer
W_2 = tf.Variable(tf.random_uniform([25, 10]))
b_2 = tf.Variable(tf.zeros([10]))
layer_2 = tf.add(tf.matmul(layer_1, W_2), b_2)
layer_2 = tf.nn.relu(layer_2)
# Build output
W_output = tf.Variable(tf.random_uniform([10, 1]))
b_output = tf.Variable(tf.zeros([1]))
output = tf.add(tf.matmul(layer_2, W_output), b_output)
return output
xs = tf.placeholder(dtype=tf.float32, name="xs")
ys = tf.placeholder(dtype=tf.float32, name='ys')
output = neural_net_model(xs, 1)
cost = tf.reduce_mean(tf.square(output-ys))
train = tf.train.GradientDescentOptimizer(.005).minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Run for 100 Epochs
for i in range(100):
for j in range(x_train.shape[0]):
sess.run(train, feed_dict={xs: x_train[j].reshape(1,1), ys: y_train[j]})
if i % 10 == 0:
c_t.append(sess.run(cost, feed_dict={xs:x_train, ys:y_train}))
print('Epoch :',i,'Cost :',c_t[-1])
pred = sess.run(output, feed_dict={xs: x_test})
print("cost: ", sess.run(cost, feed_dict={xs: x_train, ys: y_train}))
plt.scatter(x_test,y_test,label="Original Data")
plt.scatter(x_test,pred,label="Predicted Data")
plt.legend(loc='best')
plt.ylabel('value')
plt.xlabel('x_data')
plt.title('model_fit')
plt.show()```
I was just playing with a model then i remembered this problem
the issue was Y_range is so big in some point, so for tensorflow in order to run some calculation the memory will exceed and the value will be inf
for that you need to normalize the data with mean equal to 0 and std equal to one
add
# Create Data
X_range = np.arange(-50,50,0.1)
Y_range = (X_range ** 2)
def normalizor(x):
mean=x.mean()
std=x.std()
return (x-mean)/std
X_range=normalizor(X_range)
Y_range=normalizor(Y_range)
plus change the optimizer from GradientDiscent to Adam
train = tf.train.AdamOptimizer(.005).minimize(cost)
Related
i'm trying to create a neural network model for a kaggle competition using mnist dataset. currently, my code looks like this since i am trying to capture certain metrics. however, i can't seem to figure out how to turn this into an output to submit.
current:
import time
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import pandas as pd
import numpy as np
from tensorflow.python.framework import ops
ops.reset_default_graph()
#requests.packages.urllib3.disable_warnings()
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
else:
# Handle target environment that doesn't support HTTPS verification
ssl._create_default_https_context = _create_unverified_https_context
# Load training and testing data directly from TensorFlow
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
# Initialize metrics
metrics = {}
# Initialize metric names
names = ['Number of Hidden Layers', 'Nodes per Layer', 'Time in Seconds',
'Training Set Accuracy', 'Test Set Accuracy']
# Set fixed parameters
n_epochs = 20
batch_size = 50
learning_rate = 0.01
# Function that creates batch generator used in training
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
# Start timer
start = time.process_time()
n_hidden = 300
# Reset the session
tf.reset_default_graph()
#ops.reset_default_graph()
tf.set_random_seed(2141)
#tf.random.set_seed(2141)
np.random.seed(9347)
# Set X and y placeholders
X = tf.placeholder(tf.float32, shape=(None, 784), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden, name="hidden1",
activation=tf.nn.relu)
hidden2 = tf.layers.dense(hidden1, n_hidden, name="hidden2",
activation=tf.nn.relu)
logits = tf.layers.dense(hidden2, 10, name="outputs")
y_proba = tf.nn.softmax(logits)
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_train, y: y_train})
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
# Record the clock time it takes
duration = time.process_time() - start
metrics['Model 1'] = [2, n_hidden, duration, acc_train, acc_test]
# Convert metrics dictionary to dataframe for display
results_summary = pd.DataFrame.from_dict(metrics, orient='index')
results_summary.columns = names
# Sort by model number
results_summary.reset_index(inplace=True)
results_summary.sort_values(by=['index'], axis=0, inplace=True)
results_summary.set_index(['index'], inplace=True)
results_summary.index.name = None
# Export to csv
results_summary.to_csv('results_summary.csv')
results_summary
i need to create an output that looks something like this in csv file:
ImageId Label
0 1 2
1 2 0
2 3 9
3 4 0
4 5 3
would i have to recreate the whole thing in order to actually create "y_pred" when doing something like model.predict(X_test), or can i just reshape the existing code in some way to do this? ideally, i would like to capture predicted values and compare them to true values using a confusion matrix.
Since you're using old tensorflow style, predict using old tensorflow style at the end:
feed_dict = {X: X_test}
classification = tf.run(y_proba, feed_dict)
label = numpy.argmax(classification, axis=-1)
Maybe you need to create a new session for this, or use the existing sess.run, I'm not sure how this works, try to see which option gives out reasonable results.
I am new to tensorflow and i am tasked to design a feedforward neural network which consists of: an input layer, one hidden perceptron layer of 10 neurons and an output softmax layer. Assume a learning rate of 0.01, L2 regularization with weight decay parameter of 0.000001, and batch size of 32.
I would like to know if there is anyway to know if the network that I have created is what intend to create. Like a graph showing the nodes?
The following is attempt on the task but I am not sure if it is correct.
import math
import tensorflow as tf
import numpy as np
import pylab as plt
# scale data
def scale(X, X_min, X_max):
return (X - X_min)/(X_max-X_min)
def tfvariables(start_nodes, end_nodes):
W = tf.Variable(tf.truncated_normal([start_nodes, end_nodes], stddev=1.0/math.sqrt(float(start_nodes))))
b = tf.Variable(tf.zeros([end_nodes]))
return W, b
NUM_FEATURES = 36
NUM_CLASSES = 6
learning_rate = 0.01
beta = 10 ** -6
epochs = 10000
batch_size = 32
num_neurons = 10
seed = 10
np.random.seed(seed)
#read train data
train_input = np.loadtxt('sat_train.txt',delimiter=' ')
trainX, train_Y = train_input[:, :36], train_input[:, -1].astype(int)
trainX = scale(trainX, np.min(trainX, axis=0), np.max(trainX, axis=0))
# There are 6 class-labels 1,2,3,4,5,7
train_Y[train_Y == 7] = 6
trainY = np.zeros((train_Y.shape[0], NUM_CLASSES))
trainY[np.arange(train_Y.shape[0]), train_Y-1] = 1 #one matrix
# experiment with small datasets
trainX = trainX[:1000]
trainY = trainY[:1000]
n = trainX.shape[0]
# Create the model
x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
y_ = tf.placeholder(tf.float32, [None, NUM_CLASSES])
# Build the graph for the deep net
W1, b1 = tfvariables(NUM_FEATURES, num_neurons)
W2, b2 = tfvariables(num_neurons, NUM_CLASSES)
logits_1 = tf.matmul(x, W1) + b1
perceptron_layer = tf.nn.sigmoid(logits_1)
logits_2 = tf.matmul(perceptron_layer, W2) + b2
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=logits_2)
# Standard Loss
loss = tf.reduce_mean(cross_entropy)
# Loss function with L2 Regularization with beta
regularizers = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2)
loss = tf.reduce_mean(loss + beta * regularizers)
# Create the gradient descent optimizer with the given learning rate.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(cross_entropy)
correct_prediction = tf.cast(tf.equal(tf.argmax(logits_2, 1), tf.argmax(y_, 1)), tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
train_acc = []
train_loss = []
for i in range(epochs):
train_op.run(feed_dict={x: trainX, y_: trainY})
train_acc.append(accuracy.eval(feed_dict={x: trainX, y_: trainY}))
train_loss.append(loss.eval(feed_dict={x: trainX, y_: trainY}))
if i % 500 == 0:
print('iter %d: accuracy %g loss %g'%(i, train_acc[i], train_loss[i]))
# plot learning curves
plt.figure(1)
plt.plot(range(epochs), train_acc)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Train accuracy')
# plot learning curves
plt.figure(1)
plt.plot(range(epochs), train_loss)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Train loss')
plt.show()
plt.show()
You can utitilize Tensorboard to visualize the graph you created. Basically, you have to follow a few steps to do this:
declare a writer as writer = tf.summary.FileWriter('PATH/TO/A/LOGDIR')
add the graph to the writer with writer.add_graph(sess.graph) with sess being your current tf.Session() in which you execute the graph
possibly you have to use writer.flush() to write it to disk immediately
Note that you have to add these lines AFTER building your graph.
You can view the graph by executing this command in your shell:
tensorboard --logdir=PATH/TO/A/LOGDIR
Then you are presented an address (usually something like localhost:6006) on which you can view the graph with your browser (Chrome and Firefox are guaranteed to work).
Tensorboard (in TensorFlow) is useful tool.
Use tf.summary.FileWriter for writing the graph into a folder and run tensorboard from the corresponding directory.
Check the following links:
https://www.tensorflow.org/guide/graphs
https://www.tensorflow.org/guide/summaries_and_tensorboard
I am using tensorflow to do a linear regression. Here I am facing a problem:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (8,6)
data = pd.read_csv('./data.csv')
xs = data["A"][:100]
ys = data["B"][:100]
X = tf.placeholder(tf.float32, name='X')
Y = tf.placeholder(tf.float32, name='Y')
W = tf.Variable(tf.random_normal([1]),name = 'weight')
b = tf.Variable(tf.random_normal([1]),name = 'bias')
Y_pred = tf.add(tf.multiply(X,W), b)
sample_num = xs.shape[0]
loss = tf.reduce_sum(tf.pow(Y_pred - Y,2))/sample_num
learning_rate = 0.0001
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
n_samples = xs.shape[0]
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(100):
for x,y in zip(xs,ys):
_, l = sess.run([optimizer, loss], feed_dict={X: x, Y:y})
W, b = sess.run([W, b])
plt.plot(xs, ys, 'bo', label='Real data')
plt.plot(xs, xs*W + b, 'r', label='Predicted data')
plt.legend()
plt.show()
The data.csv is here.
The plot is diametrically opposed to what I expected:
So, what is the problem? I am a beginner of python and tensorflow, and just can't reach the points.
As Nipun mentioned, try AdamOptimizer instead of GradientDescentOptimizer.
You will often find that AdamOptimizer is generally a better optimizer than GradientDescentOptimizer and reaches the minima much faster.
It does so by adapting the learning rate instead of keeping it constant (0.0001 in your case).
Also, more the number of epochs, better the model (not considering over-fitting here).
Since your learning rate and the number of epochs are too small, your regression models haven't converged. Therefore, you may need to increase the learning rate and use the tf.train.AdamOptimizer.
Here I set the learning rate to 2, epochs=10000 and got the following graph.
Here I have given the code with the comments where necessary.
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (8, 6)
data = pd.read_csv('./data.csv')
xs = data["A"][:100]
ys = data["B"][:100]
X = tf.placeholder(tf.float32, name='X')
Y = tf.placeholder(tf.float32, name='Y')
W = tf.Variable(tf.random_normal([1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
Y_pred = tf.add(tf.multiply(X, W), b)
loss = tf.reduce_mean(tf.pow(Y_pred - Y, 2))
learning_rate = 2 #increase the learning rate
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)#use the AdamOptimizer
BATCH_SIZE = 8 #Batch Size define here
n_samples = xs.shape[0]
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(10000): #increase the num of epoches
for start, end in zip(range(0, n_samples, BATCH_SIZE), # mini batch Gradientdecent
range(BATCH_SIZE, n_samples + 1, BATCH_SIZE)):
_, l = sess.run([optimizer, loss], feed_dict={X: xs[start:end], Y: ys[start:end]})
prediction = sess.run(Y_pred, feed_dict={X: xs})
#W, b = sess.run([W, b])
plt.plot(xs, ys, 'bo', label='Real data')
plt.plot(xs, prediction, 'r', label='Predicted data')
plt.legend()
plt.show()
Also, you can use the mini batch gradientdescent method to accelerate the convergence as the code above.
Moreover, you can increase the number of epochs and learning rate further to get the optimal result.
Hope this helps.
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
#reproducible random seed
seed = 1
np.random.seed(seed)
#Import and normalize the data
df = pd.read_csv('creditcard.csv')
#Exploring the data
# print df.head()
# print df.describe()
# print df.isnull().sum()
# count_class = pd.value_counts(df['Class'])
# count_class.plot(kind = 'bar')
# plt.title('Fraud class histogram')
# plt.xlabel('class')
# plt.ylabel('Frequency')
# plt.show()
# print('Clearly the data is totally unbalanced!')
#to normalize the amount column
# data['normAmount'] = StandardScaler().fit_transform(data['Amount'].reshape(-1, 1))
df['normAmount'] = StandardScaler().fit_transform(df['Amount'].values.reshape(-1, 1))
df = df.drop(['Time','V28','V27','V26','V25','V24','V23','V22','V20','V15','V13','V8','Amount'], axis =1)
X = df.iloc[:,df.columns!='Class']
Y = df.iloc[:,df.columns=='Class']
# number of records in the minority class
number_record_fraud = len(df[df.Class==1])
fraud_indices = np.array(df[df.Class==1].index)
#picking normal class
normal_indices = np.array(df[df.Class==0].index)
#select random x(number_record_fraud) numbers from normal_indices
random_normal_indices = np.random.choice(normal_indices,number_record_fraud,replace=False)
random_normal_indices = np.array(random_normal_indices)
#under sample data
under_sample_indices = np.concatenate([fraud_indices,random_normal_indices])
under_sample_data = df.iloc[under_sample_indices,:]
X_undersample = under_sample_data.iloc[:,under_sample_data.columns!='Class']
Y_undersample = under_sample_data.iloc[:,under_sample_data.columns=='Class']
# split data into train and test dataset
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.3)
X_train_undersample,X_test_undersample,Y_train_undersample,Y_test_undersample = train_test_split(X_undersample,Y_undersample,test_size=0.3)
#parameters
learning_rate = 0.05
training_epoch = 10
batch_size = 43
display_step = 1
#tf graph input
x = tf.placeholder(tf.float32,[None,18])
y = tf.placeholder(tf.float32,[None,1])
#set model weights
w = tf.Variable(tf.zeros([18,1]))
b = tf.Variable(tf.zeros([1]))
#construct model
pred = tf.nn.softmax(tf.matmul(x,w) + b) #softmax activation
#minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred),reduction_indices=1))
#Gradient descent
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
#initializing variables
init = tf.global_variables_initializer()
#launch the graph
with tf.Session() as sess:
sess.run(init)
#training cycle
for epoch in range(training_epoch):
total_batch = len(X_train_undersample)/batch_size
avg_cost = 0
#loop over all the batches
for batch in range(total_batch):
batch_xs = X_train.iloc[(batch)*batch_size:(batch+1) *batch_size]
batch_ys = Y_train.iloc[(batch)*batch_size:(batch+1) *batch_size]
# run optimizer and cost operation
_,c= sess.run([optimizer,cost],feed_dict={x:batch_xs,y:batch_ys})
avg_cost += c/total_batch
correct_prediction = tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
#disply log per epoch step
if (epoch+1) % display_step == 0:
train_accuracy, newCost = sess.run([accuracy, cost], feed_dict={x: X_test,y: Y_test})
print "test_set_accuracy:",accuracy.eval({x:X_test_undersample,y:Y_test_undersample})*100
print "whole_set_accuracy:",accuracy.eval({x:X,y:Y})*100
# print train_accuracy
# print "cost",newCost
print
print 'optimization finished.'
Things I've tried to figure out what's causing it:
Tried changing train dataset length.
Dropped some not needed fields.
Tried putting validation blocks.
Dataset :link
There can be multiple reasons of why it is overfitting , and as well there can be multiple ways to debug it and to fix it. Its hard to tell just from the code, because it also depends on the data, but here are some common reaons as well as fixes:
Too small dataset, adding more data its a common overfitting fix
Too complex model, if you have many features, or complex polonomial features, try to reducing complexity using feature selection
Add regularization: i dont see regularization in your code, try to add it.
I am trying to train a sparse data with an MLP to predict a forecast. However, the forecast on the test data is giving the same value for all observations. Once I omit the activation function from each layer, the outcome starts being different.
my code is below:
# imports
import numpy as np
import tensorflow as tf
import random
import json
from scipy.sparse import rand
# Parameters
learning_rate= 0.1
training_epochs = 50
batch_size = 100
# Network Parameters
m= 1000 #number of features
n= 5000 # number of observations
hidden_layers = [5,2,4,1,6]
n_layers = len(hidden_layers)
n_input = m
n_classes = 1 # it's a regression problem
X_train = rand(n, m, density=0.2,format = 'csr').todense().astype(np.float32)
Y_train = np.random.randint(4, size=n)
X_test = rand(200, m, density=0.2,format = 'csr').todense().astype(np.float32)
Y_test = np.random.randint(4, size=200)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None])
# Store layers weight & bias
weights = {}
biases = {}
weights['h1']=tf.Variable(tf.random_normal([n_input, hidden_layers[0]])) #first matrice
biases['b1'] = tf.Variable(tf.random_normal([hidden_layers[0]]))
for i in xrange(2,n_layers+1):
weights['h'+str(i)]= tf.Variable(tf.random_normal([hidden_layers[i-2], hidden_layers[i-1]]))
biases['b'+str(i)] = tf.Variable(tf.random_normal([hidden_layers[i-1]]))
weights['out']=tf.Variable(tf.random_normal([hidden_layers[-1], 1])) #matrice between last layer and output
biases['out']= tf.Variable(tf.random_normal([1]))
# Create model
def multilayer_perceptron(_X, _weights, _biases):
layer_begin = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1'],a_is_sparse=True), _biases['b1']))
for layer in xrange(2,n_layers+1):
layer_begin = tf.nn.relu(tf.add(tf.matmul(layer_begin, _weights['h'+str(layer)]), _biases['b'+str(layer)]))
#layer_end = tf.nn.dropout(layer_begin, 0.3)
return tf.matmul(layer_begin, _weights['out'])+ _biases['out']
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
rmse = tf.reduce_sum(tf.abs(y-pred))/tf.reduce_sum(tf.abs(y)) # rmse loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(rmse) # Adam Optimizer
# Initializing the variables
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
#training
for step in xrange(training_epochs):
# Generate a minibatch.
start = random.randrange(1, n - batch_size)
#print start
batch_xs=X_train[start:start+batch_size,:]
batch_ys =Y_train[start:start+batch_size]
#printing
_,rmseRes = sess.run([optimizer, rmse] , feed_dict={x: batch_xs, y: batch_ys} )
if step % 20 == 0:
print "rmse [%s] = %s" % (step, rmseRes)
#testing
pred_test = multilayer_perceptron(X_test, weights, biases)
print "prediction", pred_test.eval()[:20]
print "actual = ", Y_test[:20]
PS: I am generating randomly my data just to reproduce the error. My data is sparse in fact, pretty similar to the one generated randomly. The problem I want to solve is: MLP is giving the same prediction for all observations in the test data.
That's a sign that your training failed. With GoogeLeNet Imagenet training I've seen it label everything as "nematode" when started with a bad choice of hyper-parameters. Things to check -- does your training loss decrease? If it doesn't decrease, try different learning rates/architectures. If it decreases to zero maybe your loss is wrong like was case here