What causes overfitting in the algorithm - python

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
#reproducible random seed
seed = 1
np.random.seed(seed)
#Import and normalize the data
df = pd.read_csv('creditcard.csv')
#Exploring the data
# print df.head()
# print df.describe()
# print df.isnull().sum()
# count_class = pd.value_counts(df['Class'])
# count_class.plot(kind = 'bar')
# plt.title('Fraud class histogram')
# plt.xlabel('class')
# plt.ylabel('Frequency')
# plt.show()
# print('Clearly the data is totally unbalanced!')
#to normalize the amount column
# data['normAmount'] = StandardScaler().fit_transform(data['Amount'].reshape(-1, 1))
df['normAmount'] = StandardScaler().fit_transform(df['Amount'].values.reshape(-1, 1))
df = df.drop(['Time','V28','V27','V26','V25','V24','V23','V22','V20','V15','V13','V8','Amount'], axis =1)
X = df.iloc[:,df.columns!='Class']
Y = df.iloc[:,df.columns=='Class']
# number of records in the minority class
number_record_fraud = len(df[df.Class==1])
fraud_indices = np.array(df[df.Class==1].index)
#picking normal class
normal_indices = np.array(df[df.Class==0].index)
#select random x(number_record_fraud) numbers from normal_indices
random_normal_indices = np.random.choice(normal_indices,number_record_fraud,replace=False)
random_normal_indices = np.array(random_normal_indices)
#under sample data
under_sample_indices = np.concatenate([fraud_indices,random_normal_indices])
under_sample_data = df.iloc[under_sample_indices,:]
X_undersample = under_sample_data.iloc[:,under_sample_data.columns!='Class']
Y_undersample = under_sample_data.iloc[:,under_sample_data.columns=='Class']
# split data into train and test dataset
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size = 0.3)
X_train_undersample,X_test_undersample,Y_train_undersample,Y_test_undersample = train_test_split(X_undersample,Y_undersample,test_size=0.3)
#parameters
learning_rate = 0.05
training_epoch = 10
batch_size = 43
display_step = 1
#tf graph input
x = tf.placeholder(tf.float32,[None,18])
y = tf.placeholder(tf.float32,[None,1])
#set model weights
w = tf.Variable(tf.zeros([18,1]))
b = tf.Variable(tf.zeros([1]))
#construct model
pred = tf.nn.softmax(tf.matmul(x,w) + b) #softmax activation
#minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred),reduction_indices=1))
#Gradient descent
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
#initializing variables
init = tf.global_variables_initializer()
#launch the graph
with tf.Session() as sess:
sess.run(init)
#training cycle
for epoch in range(training_epoch):
total_batch = len(X_train_undersample)/batch_size
avg_cost = 0
#loop over all the batches
for batch in range(total_batch):
batch_xs = X_train.iloc[(batch)*batch_size:(batch+1) *batch_size]
batch_ys = Y_train.iloc[(batch)*batch_size:(batch+1) *batch_size]
# run optimizer and cost operation
_,c= sess.run([optimizer,cost],feed_dict={x:batch_xs,y:batch_ys})
avg_cost += c/total_batch
correct_prediction = tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
#disply log per epoch step
if (epoch+1) % display_step == 0:
train_accuracy, newCost = sess.run([accuracy, cost], feed_dict={x: X_test,y: Y_test})
print "test_set_accuracy:",accuracy.eval({x:X_test_undersample,y:Y_test_undersample})*100
print "whole_set_accuracy:",accuracy.eval({x:X,y:Y})*100
# print train_accuracy
# print "cost",newCost
print
print 'optimization finished.'
Things I've tried to figure out what's causing it:
Tried changing train dataset length.
Dropped some not needed fields.
Tried putting validation blocks.
Dataset :link

There can be multiple reasons of why it is overfitting , and as well there can be multiple ways to debug it and to fix it. Its hard to tell just from the code, because it also depends on the data, but here are some common reaons as well as fixes:
Too small dataset, adding more data its a common overfitting fix
Too complex model, if you have many features, or complex polonomial features, try to reducing complexity using feature selection
Add regularization: i dont see regularization in your code, try to add it.

Related

Why are model predictions not updating in tensorflow (python)?

I built a tensorflow graph according to a few different resources online and am trying to fit a curve to y=x^2. I am using two hidden layers with 25 and 10 neurons respectively. The code completes, but the cost function never reduces and the ultimate chart of x_test to preds is clearly wrong. I've looked at lots of different resources and some O'reilly books, and I can't for the life of me figure out where I'm going wrong. Any help would be greatly appreciated.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from sklearn.model_selection import train_test_split
tf.reset_default_graph()
# Create Data
X_range = np.arange(-50,50,0.1)
Y_range = (X_range ** 2)
data = pd.DataFrame({"x": X_range, "y":Y_range})
x = data['x'].values.reshape(1000,1)
y = data['y'].values.reshape(1000,)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = .30, random_state = 42)
c_t = []
# Create NNModel function
def neural_net_model(x_data, input_dim):
# Build the First Layer
W_1 = tf.Variable(tf.random_uniform([input_dim, 25]))
b_1 = tf.Variable(tf.zeros([25]))
layer_1 = tf.add(tf.matmul(x_data, W_1), b_1)
layer_1 = tf.nn.relu(layer_1)
# Build the Second Layer
W_2 = tf.Variable(tf.random_uniform([25, 10]))
b_2 = tf.Variable(tf.zeros([10]))
layer_2 = tf.add(tf.matmul(layer_1, W_2), b_2)
layer_2 = tf.nn.relu(layer_2)
# Build output
W_output = tf.Variable(tf.random_uniform([10, 1]))
b_output = tf.Variable(tf.zeros([1]))
output = tf.add(tf.matmul(layer_2, W_output), b_output)
return output
xs = tf.placeholder(dtype=tf.float32, name="xs")
ys = tf.placeholder(dtype=tf.float32, name='ys')
output = neural_net_model(xs, 1)
cost = tf.reduce_mean(tf.square(output-ys))
train = tf.train.GradientDescentOptimizer(.005).minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Run for 100 Epochs
for i in range(100):
for j in range(x_train.shape[0]):
sess.run(train, feed_dict={xs: x_train[j].reshape(1,1), ys: y_train[j]})
if i % 10 == 0:
c_t.append(sess.run(cost, feed_dict={xs:x_train, ys:y_train}))
print('Epoch :',i,'Cost :',c_t[-1])
pred = sess.run(output, feed_dict={xs: x_test})
print("cost: ", sess.run(cost, feed_dict={xs: x_train, ys: y_train}))
plt.scatter(x_test,y_test,label="Original Data")
plt.scatter(x_test,pred,label="Predicted Data")
plt.legend(loc='best')
plt.ylabel('value')
plt.xlabel('x_data')
plt.title('model_fit')
plt.show()```
I was just playing with a model then i remembered this problem
the issue was Y_range is so big in some point, so for tensorflow in order to run some calculation the memory will exceed and the value will be inf
for that you need to normalize the data with mean equal to 0 and std equal to one
add
# Create Data
X_range = np.arange(-50,50,0.1)
Y_range = (X_range ** 2)
def normalizor(x):
mean=x.mean()
std=x.std()
return (x-mean)/std
X_range=normalizor(X_range)
Y_range=normalizor(Y_range)
plus change the optimizer from GradientDiscent to Adam
train = tf.train.AdamOptimizer(.005).minimize(cost)

Can't interpret prediction with neural network use

I'm trying to use TensorFlow in python, to make some prediction with cryptocurrency data. The problem is that the output of the prediction is like a 0.1-0.9 number whereas the cryptocurrency data should be a 10000-10100 format, and I don't find a solution to convert the 0.* number to the real one.
I've try to create a ratio, with substrat max - min from predicted values, and max-min from tested data, and divide to have a ratio but when I multiply this ratio with prediction there is a big rate of error ( found a 14000 number instead of a 10000 one )
Here some code :
train_start = 0
train_end = int(np.floor(0.7*n))
test_start = train_end
test_end = n
data_train = data[np.arange(train_start, train_end), :]
data_test = data[np.arange(test_start, test_end), :]
Scale data:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data_train = scaler.fit_transform(data_train)
data_test = scaler.transform(data_test)
Build X and y:
X_train = data_train[:, 1:]
y_train = data_train[:, 0]
X_test = data_test[:, 1:]
y_test = data_test[:, 0]
.
.
.
n_data = 10
n_neurons_1 = 1024
n_neurons_2 = 512
n_neurons_3 = 256
n_neurons_4 = 128
n_target = 1
X = tf.compat.v1.placeholder(dtype=tf.compat.v1.float32, shape=[None, n_data])
Y = tf.compat.v1.placeholder(dtype=tf.compat.v1.float32, shape=[None])
Hidden layer
..
Output layer (must be transposed)
..
Cost function
..
Optimizer
..
Make Session:
sess = tf.compat.v1.Session()
Run initializer:
sess.run(tf.compat.v1.global_variables_initializer())
Setup interactive plot:
plt.ion()
fig = plt.figure()
ax1 = fig.add_subplot(111)
line1, = ax1.plot(y_test)
line2, = ax1.plot(y_test*0.5)
plt.show()
epochs = 10
batch_size = 256
for e in range(epochs):
# Shuffle training data
shuffle_indices = np.random.permutation(np.arange(len(y_train)))
X_train = X_train[shuffle_indices]
y_train = y_train[shuffle_indices]
# Minibatch training
for i in range(0, len(y_train) // batch_size):
start = i * batch_size
batch_x = X_train[start:start + batch_size]
batch_y = y_train[start:start + batch_size]
# Run optimizer with batch
sess.run(opt, feed_dict={X: batch_x, Y: batch_y})
# Show progress
if np.mod(i, 5) == 0:
# Prediction
pred = sess.run(out, feed_dict={X: X_test})
#This pred var is the output of the prediction
I persiste my result in a file and this is what its looks like :
2019-08-21 06-AM;15310.444858356934;0.50021994;
2019-08-21 12-PM;14287.717187390663;0.46680558;
2019-08-21 06-PM;14104.63871795706;0.46082407;
For example, the last prediction is 0,46 but when I try to convert it I found 14104 whereas it should be nearer a 10000 value
Does anyone have an idea how to convert those predictions?
Thanks!
You will have to make use of inverse_transform of MinMaxScaler to convert back the output you are getting in range of 0-1.
You have not given your model, but I believe you are making use of regression task with few dense layers. You will have to keep minimizing your loss. If you are using mean squared error, the larger the loss, more is the likelihood your output will be far away from the desired set of results.
Even after your loss is a small number and the result is coming good for train samples, but the prediction is bad for test dataset, you may have to consider increasing your train dataset so that more possibilities are covered. If that is not possible, consider reducing the number of neurons in your neural network so that it stops over-fitting.
You can do some postprocessing to restrict the output to some desired range.

How to use dataset in TensorFlow session for training

I like to perform image classification on our own large image libary (millions of labeled images) with tensorflow. I´m new to stackoverflow, python and tensorflow and worked myself through a few tutorials (mnist etc.) and got to the point, where i was able to prepare a TensorFlow datset from a dictionary including the absolute path to the images and the according labels. However, i´m stuck at the point using the dataset in a TensorFlow session. Here is my (example) code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import time
import mymodule # I build my module to read the images and labels
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from tensorflow.contrib.data import Iterator
beginTime = time.time()
batch_size = 100
learning_rate = 0.005
max_steps = 2
NUM_CLASSES = 25
def input_parser(img_path, label):
one_hot = tf.one_hot(label, NUM_CLASSES)
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_jpeg(img_file, channels = 3)
return img_decoded, one_hot
#Import Training data (returns the dicitonary with paths and labels)
train_dict = mymodule.getFileMap(labelList, imageList)
#Import Test data
test_dict = mymodule.getFileMap(labelList, imageList)
#Get train data
train_file_list, train_label_list = get_file_label_list(train_dict)
train_images_tensor = ops.convert_to_tensor(train_file_list, dtype=dtypes.string)
train_labels_tensor = ops.convert_to_tensor(train_label_list, dtype=dtypes.int64)
#Get test data
test_file_list, test_label_list = get_file_label_list(test_dict)
test_images_tensor = ops.convert_to_tensor(test_file_list, dtype=dtypes.string)
test_labels_tensor = ops.convert_to_tensor(test_label_list, dtype=dtypes.int64)
#Create TensorFlow Datset object
train_data = tf.data.Dataset.from_tensor_slices((train_images_tensor, train_labels_tensor))
test_data = tf.data.Dataset.from_tensor_slices((test_images_tensor, test_labels_tensor))
# Transform the datset so that it contains decoded images
# and one-hot vector labels
train_data = train_data.map(input_parser)
test_data = test_data.map(input_parser)
# Batching --> How to do it right?
#train_data = train_data.batch(batch_size = 100)
#test_data = train_data.batch(batch_size = 100)
#Define input placeholders
image_size = 990*990*3
images_placeholder = tf.placeholder(tf.float32, shape=[None, image_size])
labels_placeholder = tf.placeholder(tf.int64, shape=[None])
# Define variables (these afe the values we want to optimize)
weigths = tf.Variable(tf.zeros([image_size, NUM_CLASSES]))
biases = tf.Variable(tf.zeros([NUM_CLASSES]))
# Define the classifier´s result
logits = tf.matmul(images_placeholder, weigths) + biases
# Define the loss function
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits, labels = labels_placeholder))
# Define the training operation
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
# Operation comparing prediciton with true label
correct_prediciton = tf.equal(tf.argmax(logits, 1), labels_placeholder)
# Operation calculating the accuracy of our predicitons
accuracy = tf.reduce_mean(tf.cast(correct_prediciton, tf.float32))
#Create TensorFlow Iterator object
iterator = Iterator.from_structure(train_data.output_types,
train_data.output_shapes)
next_element = iterator.get_next()
#Create two initialization ops to switch between the datasets
train_init_op = iterator.make_initializer(train_data)
test_init_op = iterator.make_initializer(test_data)
with tf.Session() as sess:
#Initialize variables
sess.run(tf.global_variables_initializer())
sess.run(train_init_op)
for _ in range(10):
try:
elem = sess.run(next_element)
print(elem)
except tf.errors.OutOfRangeError:
print("End of training datset.")
break
Following this and this tutorial i could not solve the problem of how to use the (image and label) dataset in a tensorflow session for training. I was able to print out the datset by iterating through it, but wasn´t able to use it for learning.
I don´t understand how to access the images and labels seperately after they have been merged in the train_data = tf.data.Dataset.from_tensor_slices((train_images_tensor, train_labels_tensor)) operation, as requried by the 2nd tutorial. Also i don´t know how to implement batching correctly.
What i want to do in the session is basically this (from the 2nd tutorial):
# Generate input data batch
indices = np.random.choice(data_sets['images_train'].shape[0], batch_size)
images_batch = data_sets['images_train'][indices]
labels_batch = data_sets['labels_train'][indices]
# Periodically print out the model's current accuracy
if i % 100 == 0:
train_accuracy = sess.run(accuracy, feed_dict={
images_placeholder: images_batch, labels_placeholder: labels_batch})
print('Step {:5d}: training accuracy {:g}'.format(i, train_accuracy))
# Perform a single training step
sess.run(train_step, feed_dict={images_placeholder: images_batch,
labels_placeholder: labels_batch})
# After finishing the training, evaluate on the test set
test_accuracy = sess.run(accuracy, feed_dict={
images_placeholder: data_sets['images_test'],
labels_placeholder: data_sets['labels_test']})
print('Test accuracy {:g}'.format(test_accuracy))
endTime = time.time()
print('Total time: {:5.2f}s'.format(endTime - beginTime))
If anyone can tell me, how to access images and labels in the dataset sepearately and use it for training, i would be really thankful. Also a tip where and how to do the batching would be appreciated.
Thank you.
In your code, next_element is a tuple of two tensors, matching the structure of your datasets: i.e. it is a tuple whose first element is an image, and second element is a label. To access the individual tensors, you can do the following:
next_element = iterator.get_next()
next_image = next_element[0]
next_label = next_element[1]
# Or, in a single line:
next_image, next_label = iterator.get_next()
To batch a tf.data.Dataset, you can use the Dataset.batch() transformation. Your commented out code for this should simply work:
train_data = train_data.batch(batch_size = 100)
test_data = train_data.batch(batch_size = 100)

MLP on TensorFlow is giving the same prediction for all observations after the training

I am trying to train a sparse data with an MLP to predict a forecast. However, the forecast on the test data is giving the same value for all observations. Once I omit the activation function from each layer, the outcome starts being different.
my code is below:
# imports
import numpy as np
import tensorflow as tf
import random
import json
from scipy.sparse import rand
# Parameters
learning_rate= 0.1
training_epochs = 50
batch_size = 100
# Network Parameters
m= 1000 #number of features
n= 5000 # number of observations
hidden_layers = [5,2,4,1,6]
n_layers = len(hidden_layers)
n_input = m
n_classes = 1 # it's a regression problem
X_train = rand(n, m, density=0.2,format = 'csr').todense().astype(np.float32)
Y_train = np.random.randint(4, size=n)
X_test = rand(200, m, density=0.2,format = 'csr').todense().astype(np.float32)
Y_test = np.random.randint(4, size=200)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None])
# Store layers weight & bias
weights = {}
biases = {}
weights['h1']=tf.Variable(tf.random_normal([n_input, hidden_layers[0]])) #first matrice
biases['b1'] = tf.Variable(tf.random_normal([hidden_layers[0]]))
for i in xrange(2,n_layers+1):
weights['h'+str(i)]= tf.Variable(tf.random_normal([hidden_layers[i-2], hidden_layers[i-1]]))
biases['b'+str(i)] = tf.Variable(tf.random_normal([hidden_layers[i-1]]))
weights['out']=tf.Variable(tf.random_normal([hidden_layers[-1], 1])) #matrice between last layer and output
biases['out']= tf.Variable(tf.random_normal([1]))
# Create model
def multilayer_perceptron(_X, _weights, _biases):
layer_begin = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1'],a_is_sparse=True), _biases['b1']))
for layer in xrange(2,n_layers+1):
layer_begin = tf.nn.relu(tf.add(tf.matmul(layer_begin, _weights['h'+str(layer)]), _biases['b'+str(layer)]))
#layer_end = tf.nn.dropout(layer_begin, 0.3)
return tf.matmul(layer_begin, _weights['out'])+ _biases['out']
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
rmse = tf.reduce_sum(tf.abs(y-pred))/tf.reduce_sum(tf.abs(y)) # rmse loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(rmse) # Adam Optimizer
# Initializing the variables
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
#training
for step in xrange(training_epochs):
# Generate a minibatch.
start = random.randrange(1, n - batch_size)
#print start
batch_xs=X_train[start:start+batch_size,:]
batch_ys =Y_train[start:start+batch_size]
#printing
_,rmseRes = sess.run([optimizer, rmse] , feed_dict={x: batch_xs, y: batch_ys} )
if step % 20 == 0:
print "rmse [%s] = %s" % (step, rmseRes)
#testing
pred_test = multilayer_perceptron(X_test, weights, biases)
print "prediction", pred_test.eval()[:20]
print "actual = ", Y_test[:20]
PS: I am generating randomly my data just to reproduce the error. My data is sparse in fact, pretty similar to the one generated randomly. The problem I want to solve is: MLP is giving the same prediction for all observations in the test data.
That's a sign that your training failed. With GoogeLeNet Imagenet training I've seen it label everything as "nematode" when started with a bad choice of hyper-parameters. Things to check -- does your training loss decrease? If it doesn't decrease, try different learning rates/architectures. If it decreases to zero maybe your loss is wrong like was case here

How do I use TensorFlow to add Predicted Value to an empty column in a CSV file?

So I have this great bit of code that comes out with approximately a 93% accuracy rate on its predictions. What I'm wondering how to do now is to take the trained program, make it look at actual test data without the answer on it, and make it fill in the answer regardless of the accuracy. Here's the code that I have that predicts with a ~93% accuracy rate.
import tensorflow as tf
import numpy as np
from numpy import genfromtxt
import sklearn
# Convert to one hot
def convertOneHot(data):
y=np.array([int(i[0]) for i in data])
y_onehot=[0]*len(y)
for i,j in enumerate(y):
y_onehot[i]=[0]*(y.max() + 1)
y_onehot[i][j]=1
return (y,y_onehot)
data = genfromtxt('cs-training.csv',delimiter=',') # Training data
test_data = genfromtxt('cs-test.csv',delimiter=',') # Test data
x_train=np.array([ i[1::] for i in data])
y_train,y_train_onehot = convertOneHot(data)
x_test=np.array([ i[1::] for i in test_data])
y_test,y_test_onehot = convertOneHot(test_data)
A=data.shape[1]-1 # Number of features, Note first is y
B=len(y_train_onehot[0])
tf_in = tf.placeholder("float", [None, A]) # Features
tf_weight = tf.Variable(tf.zeros([A,B]))
tf_bias = tf.Variable(tf.zeros([B]))
tf_softmax = tf.nn.softmax(tf.matmul(tf_in,tf_weight) + tf_bias)
# Training via backpropagation
tf_softmax_correct = tf.placeholder("float", [None,B])
tf_cross_entropy = -tf.reduce_sum(tf_softmax_correct*tf.log(tf_softmax))
# Train using tf.train.GradientDescentOptimizer
tf_train_step = tf.train.GradientDescentOptimizer(0.01).minimize(tf_cross_entropy)
# Add accuracy checking nodes
tf_correct_prediction = tf.equal(tf.argmax(tf_softmax,1), tf.argmax(tf_softmax_correct,1))
tf_accuracy = tf.reduce_mean(tf.cast(tf_correct_prediction, "float"))
saver = tf.train.Saver([tf_weight,tf_bias])
# Initialize and run
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
print("...")
# Run the training
for i in range(100):
sess.run(tf_train_step, feed_dict={tf_in: x_train, tf_softmax_correct: y_train_onehot})
#Print accuracy
result = sess.run(tf_accuracy, feed_dict={tf_in: x_test, tf_softmax_correct: y_test_onehot})
print result
Now I have the actual test set cs-test-actual.csv where the first column is entirely empty and I need to fill it in with a predicted 1 or 0. How do I go about doing that?
The program above doesn't appear to be saving the trained session. I think you want to do this in two steps.
Train and save the session
Restore the save session, and run test data through it.
Step 1:
#!/usr/bin/env python
import tensorflow as tf
import numpy as np
from numpy import genfromtxt
import sklearn
# Convert to one hot
def convertOneHot(data):
y=np.array([int(i[0]) for i in data])
y_onehot=[0]*len(y)
for i,j in enumerate(y):
y_onehot[i]=[0]*(y.max() + 1)
y_onehot[i][j]=1
return (y,y_onehot)
# Build Example Data is CSV format, but use Iris data
from sklearn import datasets
from sklearn.model_selection import train_test_split
def buildDataFromIris():
iris = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)
f=open('cs-training.csv','w')
for i,j in enumerate(X_train):
k=np.append(np.array(y_train[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
f=open('cs-test.csv','w')
for i,j in enumerate(X_test):
k=np.append(np.array(y_test[i]),j )
f.write(",".join([str(s) for s in k]) + '\n')
f.close()
# Recreate logging and save dir
# Seems the tensorflow won't always overwrite
import shutil, os, sys
TMPDir='./tensorTMP'
try:
shutil.rmtree(TMPDir)
except:
print "Tmp Dir did not exist...that's okay"
os.mkdir(TMPDir, 0755 )
# Populate the data
buildDataFromIris()
data = genfromtxt('cs-training.csv',delimiter=',') # Training data
test_data = genfromtxt('cs-test.csv',delimiter=',') # Test data
x_train=np.array([ i[1::] for i in data])
y_train,y_train_onehot = convertOneHot(data)
x_test=np.array([ i[1::] for i in test_data])
y_test,y_test_onehot = convertOneHot(test_data)
A=data.shape[1]-1 # Number of features, Note first is y
B=len(y_train_onehot[0])
tf_in = tf.placeholder("float", [None, A]) # Features
tf_weight = tf.Variable(tf.zeros([A,B]))
tf_bias = tf.Variable(tf.zeros([B]))
tf_softmax = tf.nn.softmax(tf.matmul(tf_in,tf_weight) + tf_bias)
# Training via backpropagation
tf_softmax_correct = tf.placeholder("float", [None,B])
tf_cross_entropy = -tf.reduce_sum(tf_softmax_correct*tf.log(tf_softmax))
# Train using tf.train.GradientDescentOptimizer
tf_train_step = tf.train.GradientDescentOptimizer(0.01).minimize(tf_cross_entropy)
# Add accuracy checking nodes
tf_correct_prediction = tf.equal(tf.argmax(tf_softmax,1), tf.argmax(tf_softmax_correct,1))
tf_accuracy = tf.reduce_mean(tf.cast(tf_correct_prediction, "float"))
saver = tf.train.Saver([tf_weight,tf_bias])
# Initialize and run
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
THRESHOLD = 0.98
saved = False
print("...")
# Run the training
for i in range(100):
sess.run(tf_train_step, feed_dict={tf_in: x_train, tf_softmax_correct: y_train_onehot})
result = sess.run(tf_accuracy, feed_dict={tf_in: x_test, tf_softmax_correct: y_test_onehot})
# If it's well trained on this iteration, save it. We just need one save.
if result > THRESHOLD and saved == False:
saved = True
print "saving result {}".format(result)
saver.save(sess,TMPDir +"/savedSess")
The only modifications made were generating sample data using Iris, establishing a THRESHOLD or confidence interval for the session. If it's over that THRESHOLD, then, save the session. After running step one, the model should be trained and saved.
Step 2:
Restore the saved session, and run the training data through it.
#!/usr/bin/env python
import tensorflow as tf
import numpy as np
from numpy import genfromtxt
import sklearn
# Convert to one hot
def convertOneHot(data):
y=np.array([int(i[0]) for i in data])
y_onehot=[0]*len(y)
for i,j in enumerate(y):
y_onehot[i]=[0]*(y.max() + 1)
y_onehot[i][j]=1
return (y,y_onehot)
data = genfromtxt('cs-training.csv',delimiter=',') # Training data
test_data = genfromtxt('cs-test.csv',delimiter=',') # Test data
x_train=np.array([ i[1::] for i in data])
y_train,y_train_onehot = convertOneHot(data)
x_test=np.array([ i[1::] for i in test_data])
y_test,y_test_onehot = convertOneHot(test_data)
A=data.shape[1]-1 # Number of features, Note first is y
B=len(y_train_onehot[0])
tf_in = tf.placeholder("float", [None, A]) # Features
tf_weight = tf.Variable(tf.zeros([A,B]))
tf_bias = tf.Variable(tf.zeros([B]))
tf_softmax = tf.nn.softmax(tf.matmul(tf_in,tf_weight) + tf_bias)
# Training via backpropagation
tf_softmax_correct = tf.placeholder("float", [None,B])
tf_cross_entropy = -tf.reduce_sum(tf_softmax_correct*tf.log(tf_softmax))
# Train using tf.train.GradientDescentOptimizer
tf_train_step = tf.train.GradientDescentOptimizer(0.01).minimize(tf_cross_entropy)
# Add accuracy checking nodes
tf_correct_prediction = tf.equal(tf.argmax(tf_softmax,1), tf.argmax(tf_softmax_correct,1))
tf_accuracy = tf.reduce_mean(tf.cast(tf_correct_prediction, "float"))
saver = tf.train.Saver([tf_weight,tf_bias])
# Initialize and run
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
TMPDir='./tensorTMP'
saver.restore(sess, TMPDir + '/savedSess')
ans = sess.run(tf_softmax, feed_dict={tf_in: x_test, tf_softmax_correct: y_test_onehot})
print ans
Note, your output will look like the following...
[[ 6.17585704e-02 8.63590300e-01 7.46511072e-02]
[ 9.98804331e-01 1.19561062e-03 3.25832108e-13]
[ 1.52018686e-07 4.49650863e-04 9.99550164e-01]

Categories