I`m just finished to write neural net with tensorflow
attached code :
import tensorflow as tensorFlow
import csv
# read data from csv
file = open('stub.csv')
reader = csv.reader(file)
temp = list(reader)
del temp[0]
# change data from string to float (Tensorflow)
# create data & goal lists
data = []
goal = []
for i in range(len(temp)):
data.append(map(float, temp[i]))
goal.append([data[i][6], 0.0])
del data[i][6]
# change lists to tuple
data = tuple(tuple(x) for x in data)
goal = tuple(goal)
# create training data and test data by 70-30
a = int(len(data) * 0.6) # training set 60%
b = int(len(data) * 0.8) # validation & test: each one is 20%
trainData = data[0:a] # 60%
validationData = data[b: len(data)]
testData = data[a: b] # 20%
trainGoal = goal[0:a]
validationGoal = goal[b:len(data)]
testGoal = goal[a: b]
numberOfLayers = 500
nodesLayer = []
# define the numbers of nodes in hidden layers
for i in range(numberOfLayers):
nodesLayer.append(500)
# define our goal class
classes = 2
batchSize = 2000
# x for input, y for output
sizeOfRow = len(data[0])
x = tensorFlow.placeholder(dtype= tensorFlow.float32, shape=[None, sizeOfRow])
y = tensorFlow.placeholder(dtype= tensorFlow.float32, shape=[None, classes])
hiddenLayers = []
layers = []
def neuralNetworkModel(x):
# first step: (input * weights) + bias, linear operation like y = ax + b
# each layer connection to other layer will represent by nodes(i) * nodes(i+1)
for i in range(0,numberOfLayers):
if i == 0:
hiddenLayers.append({"weights": tensorFlow.Variable(tensorFlow.random_normal([sizeOfRow, nodesLayer[i]])),
"biases": tensorFlow.Variable(tensorFlow.random_normal([nodesLayer[i]]))})
elif i > 0 and i < numberOfLayers-1:
hiddenLayers.append({"weights" : tensorFlow.Variable(tensorFlow.random_normal([nodesLayer[i], nodesLayer[i+1]])),
"biases" : tensorFlow.Variable(tensorFlow.random_normal([nodesLayer[i+1]]))})
else:
outputLayer = {"weights": tensorFlow.Variable(tensorFlow.random_normal([nodesLayer[i], classes])),
"biases": tensorFlow.Variable(tensorFlow.random_normal([classes]))}
# create the layers
for i in range(numberOfLayers):
if i == 0:
layers.append(tensorFlow.add(tensorFlow.matmul(x, hiddenLayers[i]["weights"]), hiddenLayers[i]["biases"]))
layers.append(tensorFlow.nn.relu(layers[i])) # pass values to activation function (i.e sigmoid, softmax) and add it to the layer
elif i >0 and i < numberOfLayers-1:
layers.append(tensorFlow.add(tensorFlow.matmul(layers[i-1], hiddenLayers[i]["weights"]), hiddenLayers[i]["biases"]))
layers.append(tensorFlow.nn.relu(layers[i]))
output = tensorFlow.matmul(layers[numberOfLayers-1], outputLayer["weights"]) + outputLayer["biases"]
return output
def neuralNetworkTrain(data, x, y):
prediction = neuralNetworkModel(x)
# using softmax function, normalize values to range(0,1)
cost = tensorFlow.reduce_mean(tensorFlow.nn.softmax_cross_entropy_with_logits(prediction, y))
# minimize the cost function
# using AdamOptimizer algorithm
optimizer = tensorFlow.train.AdadeltaOptimizer().minimize(cost)
epochs = 2 # feed machine forward + backpropagation = epoch
# build sessions and train the model
with tensorFlow.Session() as sess:
sess.run(tensorFlow.initialize_all_variables())
for epoch in range(epochs):
epochLoss = 0
i = 0
for _ in range(int(len(data) / batchSize)):
ex, ey = nextBatch(i) # takes 500 examples
i += 1
feedDict = {x :ex, y:ey }
_, cos = sess.run([optimizer,cost], feed_dict= feedDict) # start session to optimize the cost function
epochLoss += cos
print("Epoch", epoch + 1, "completed out of", epochs, "loss:", epochLoss)
correct = tensorFlow.equal(tensorFlow.argmax(prediction,1), tensorFlow.argmax(y, 1))
accuracy = tensorFlow.reduce_mean(tensorFlow.cast(correct, "float"))
print("Accuracy:", accuracy.eval({ x: trainData, y:trainGoal}))
# takes 500 examples each iteration
def nextBatch(num):
# Return the next `batch_size` examples from this data set.
# case: using our data & batch size
num *= batchSize
if num < (len(data) - batchSize):
return data[num: num+batchSize], goal[num: num +batchSize]
neuralNetworkTrain(trainData, x, y)
each epoch (iteration) I`ve got the value of loss function and all good.
now I want to try it on my validation/test set.
Someone now what should I do exactly?
Thanks
If you want to get predictions on the trained data you can simply put something like:
tf_p = tf.nn.softmax(prediction)
...In your graph, having loaded your test data into x_test. Then evaluate predictions with:
[p] = session.run([tf_p], feed_dict = {
x : x_test,
y : y_test
}
)
at the end of your neuralNetworkTrain method, and you should end up having them in p.
...Or using tf.train.Saver:
Alternatively you could use tf.train.Saver object to save and restore (and optionally persist) your model. In order to do that you create a saver after you initialise all variables:
...
tf.initialize_all_variables().run()
saver = tf.train.Saver()
...
And then save it once you're done training, at the end of your neuralNetworkTrain method:
...
model_path = saver.save(sess)
You then build a new graph for evaluation, and restore the model before running it on your test data:
# Load test dataset into X_test
...
tf_x = tf.constant(X_test)
tf_p = tf.nn.softmax(neuralNetworkModel(tf_x))
with tf.Session() as session:
tf.initialize_all_variables().run()
saver.restore(session, model_path)
p = tf_p.eval()
And, once again, p should contain softmax activations for your test dataset.
(I haven't actually run this code I'm afraid, but it should give you an idea of how to implement it.)
Related
I am learning TensorFlow by implementing a simple logisitic regression classifier that outputs whether a digit is 7 or not when fed an MNIST image. I am using Stochastic gradient descent. The crux of the Tensorflow code is
# Maximum number of epochs
MaxEpochs = 1
# Learning rate
eta = 1e-2
ops.reset_default_graph()
n_x = 784
n_y = 1
x_tf = tf.placeholder(tf.float32, shape = [n_x, 1], name = 'x_tf')
y_tf = tf.placeholder(tf.float32, shape = [n_y, 1], name = 'y_tf')
w_tf = tf.get_variable(name = "w_tf", shape = [n_x, 1], initializer = tf.initializers.random_uniform());
b_tf = tf.get_variable(name = "b_tf", shape = [n_y, 1], initializer = tf.initializers.random_uniform());
z_tf = tf.add(tf.matmul(w_tf, x_tf, transpose_a = True), b_tf, name = 'z_tf')
yPred_tf = tf.sigmoid(z_tf, name = 'yPred_tf')
Loss_tf = tf.nn.sigmoid_cross_entropy_with_logits(logits = yPred_tf, labels = y_tf, name = 'Loss_tf')
with tf.name_scope('Training'):
optimizer_tf = tf.train.GradientDescentOptimizer(learning_rate = eta)
train_step = optimizer_tf.minimize(Loss_tf)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for Epoch in range(MaxEpochs):
for Sample in range(len(XTrain)):
x = XTrain[Sample]
y = YTrain[Sample].reshape([-1,1])
Train_sample = {x_tf: x, y_tf: y}
sess.run(train_step, feed_dict = Train_sample)
toc = time.time()
print('\nElapsed time is: ', toc-tic,'s');
It builds the following graph (tensorboard related code has been removed for convenience):
The problem is even though the weights and biases are initialised randomly (non-zero), the neuron isn't being trained. The weight histogram is as follows.
I didnt want to post something so trivial, but I am at my wit's end. Sorry for the long post. Thank you very much in advance for any guidance. A little side note, it is taking 93.35s to run, it only took 10 or so seconds when I did this with numpy (same stochastic implementation), why would this be so?
EDIT:
The bias plot over the course of the training is as follows.
EDIT: The entire code, if the issue is cropping up on something outside what I previously thought.
import tensorflow as tf
import numpy as np
import h5py
from tensorflow.python.framework import ops
import time
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
def Flatten(Im):
FlatImArray = Im.reshape([Im.shape[0],-1,1])
return FlatImArray
DigitTested = 7
# Sperating the images with 7s from the rest
TrainIdxs = [];
for i in range(len(y_train)):
if(y_train[i] == DigitTested):
TrainIdxs.append(i)
TestIdxs = [];
for i in range(len(y_test)):
if(y_test[i] == DigitTested):
TestIdxs.append(i)
# Preparing the Datasets for training and testing
XTrain = Flatten(x_train);
YTrain = np.zeros([len(x_train),1]);
YTrain[TrainIdxs] = 1;
XTest = Flatten(x_test);
YTest = np.zeros([len(x_test),1]);
YTest[TestIdxs] = 1;
tic = time.time()
# Maximum number of epochs
MaxEpochs = 1
# Learning rate
eta = 1e-2
# Number of Epochs after which the neuron is validated
ValidationInterval = 1
ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables
n_x = 784
n_y = 1
x_tf = tf.placeholder(tf.float32, shape = [n_x, 1], name = 'x_tf')
y_tf = tf.placeholder(tf.float32, shape = [n_y, 1], name = 'y_tf')
w_tf = tf.get_variable(name = "w_tf", shape = [n_x, 1], initializer = tf.initializers.random_uniform());
b_tf = tf.get_variable(name = "b_tf", shape = [n_y, 1], initializer = tf.initializers.random_uniform());
z_tf = tf.add(tf.matmul(w_tf, x_tf, transpose_a = True), b_tf, name = 'z_tf')
yPred_tf = tf.sigmoid(z_tf, name = 'yPred_tf')
Loss_tf = tf.nn.sigmoid_cross_entropy_with_logits(logits = yPred_tf, labels = y_tf, name = 'Loss_tf')
with tf.name_scope('Training'):
optimizer_tf = tf.train.GradientDescentOptimizer(learning_rate = eta)
train_step = optimizer_tf.minimize(Loss_tf)
writer = tf.summary.FileWriter(r"C:\Users\braja\Documents\TBSummaries\MNIST1NTF\2")
tf.summary.histogram('Weights', w_tf)
tf.summary.scalar('Loss', tf.reshape(Loss_tf, []))
tf.summary.scalar('Bias', tf.reshape(b_tf, []))
merged_summary = tf.summary.merge_all()
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for Epoch in range(MaxEpochs):
for Sample in range(len(XTrain)):
x = XTrain[Sample]
y = YTrain[Sample].reshape([-1,1])
Train_sample = {x_tf: x, y_tf: y}
MergedSumm, _ = sess.run([merged_summary, train_step], feed_dict = Train_sample)
writer.add_summary(summary = MergedSumm, global_step = Sample)
if((Epoch+1) %ValidationInterval == 0):
ValidationError = 0
for Sample in range(len(XTest)):
x = XTest[Sample]
y = YTest[Sample].reshape([-1,1])
Test_sample = {x_tf: x, y_tf: y}
yPred = sess.run(yPred_tf, feed_dict = Test_sample)
ValidationError += abs(yPred - YTest[Sample])
print('Validation Error at', Epoch+1,'Epoch:', ValidationError);
writer.add_graph(tf.Session().graph)
writer.close()
toc = time.time()
print('\nElapsed time is: ', toc-tic,'s');
Looking at the bias value it looks like you are seeing saturation of the sigmoid function.
This happens when you push your sigmoid input(z_tf) to the extreme ends of the sigmoid function. When this happens, the gradient returned is so low that the training stagnates. The probable cause of this is that it seems you have doubled up on sigmoid functions; sigmoid_cross_entropy_with_logits applies a sigmoid to its input, but you have implemented one yourself already. Try removing one of these.
In addition, by default tf.initializers.random_uniform()) produces random values between 0:1. You probably want to initialise your Weights and biases symmetrically about 0 and at really small values to start with. This can be done by passing arguments minval and maxval to tf.initializers.random_uniform().
They should grow during training and again this prevents sigmoid saturation.
I'm having trouble training a seq2seq model using Tensorflow on an Nvidia P100 GPU.
Here are the versions I'm using: TensorFlow 1.10.0, Keras 2.2.2, Python 3.6.3, CUDA 9.2.148.1, cuDNN 7.2.1
I currently get an OOM error well in the middle of training (18 minutes).
I've been doing a little digging and tried setting allow_growth = True (flag not set in the code below) but did not manage to see any memory grow, it all gets allocated at the start.
I also tried setting the graph to read only with tf.finalize() but the program still runs, which suggests no nodes are being added or that if haven't placed the function in the correct place in the code.
Since the graph trains and can be saved and all, it doesn't seem to be too large at the start.
Here are some of the hyper parameters I'm using:
batch_size = 10
rnn_size = 1024
src/tgt_vocab_size = 8000
epochs = 20
display_steps = 10
The dataset used are docstrings and the associated function code, so not incredibly long.
One of my original thoughts was that since the size of the sentences is dynamic, one really long one could be too big. But I shuffled the dataset to see if the crash happened at a different time and it's still at 18 minutes with the same parameters.
Here is the code including the graphs and the training/testing loop.
def train(...):
...
...
def source_generator():
for el in train_source_sents:
yield el
def target_generator():
for el in train_target_sents:
yield el
train_graph = tf.Graph()
with train_graph.as_default():
# Dataset and batch preparation
with tf.name_scope("Dataset-prep"):
source_dataset = tf.data.Dataset.from_generator(source_generator,output_types= tf.int32,output_shapes=(tf.TensorShape([None]))) # Converting sentence array to dataset
target_dataset = tf.data.Dataset.from_generator(target_generator,output_types= tf.int32,output_shapes=(tf.TensorShape([None]))) # Converting sentence array to dataset
target_dataset = target_dataset.map(lambda x: tf.concat([x, [target_tok2ID['<EOS>']]], 0)) # Adding <EOS> character to the endo of all the target sentences
target_input_dataset = target_dataset.map(lambda x: tf.concat([[target_tok2ID['<GO>']], x], 0)) # Creating training inputs for the decoder, This requires adding <GO> to the start of the sequence
target_sequence_length = target_dataset.map(lambda x: tf.shape(x)[0]) # Adding the sizes of all the sequences to be paired up with the reset of the dataset
dataset = tf.data.Dataset.zip((source_dataset, target_dataset, target_input_dataset, target_sequence_length)) # create the collection of all the individual datasets
dataset = dataset.shuffle(buffer_size=10000)
with tf.name_scope("Dataset-batching"):
dataset = dataset.repeat(epochs)
pad_id = target_tok2ID['<PAD>']
batched_dataset = dataset.padded_batch(batch_size, padded_shapes=([None], [None],[None], []), padding_values=(pad_id,pad_id,pad_id,pad_id))
batched_dataset = batched_dataset.prefetch(buffer_size=batch_size) # could be removed, perhaps yeilds improvements
iterator = batched_dataset.make_one_shot_iterator()
source_batch, target_batch, target_input_batch, batch_target_sequence_length = iterator.get_next()
with tf.name_scope("Encoding-layer"):
source_vocab_size = len(source_tok2ID)
embed = tf.contrib.layers.embed_sequence(source_batch, vocab_size=source_vocab_size, embed_dim=encoding_embedding_size)
stacked_cells = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.LSTMCell(rnn_size), keep_prob) for _ in range(num_layers)])
outputs, encoder_state = tf.nn.dynamic_rnn(stacked_cells, embed, dtype=tf.float32)
with tf.name_scope("Decoding-layer"):
target_vocab_size = len(target_tok2ID)
dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))
dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, target_input_batch)
cells = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.LSTMCell(rnn_size) for _ in range(num_layers)])
output_layer = tf.layers.Dense(target_vocab_size)
dec_cell = tf.contrib.rnn.DropoutWrapper(cells, output_keep_prob=keep_prob)
helper = tf.contrib.seq2seq.TrainingHelper(dec_embed_input, batch_target_sequence_length)
decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, helper, encoder_state, output_layer)
max_target_sequence_length = tf.reduce_max(batch_target_sequence_length, axis=0)
dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, impute_finished=True, maximum_iterations=max_target_sequence_length)
dec_outputs = tf.identity(dec_outputs.rnn_output, name='logits') # This step might seem a little misterious, but it is to take the output from the dynamic decoder and pass it to a tensor from (Dodumentation is scarce here)
masks = tf.sequence_mask(batch_target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')
cost = tf.contrib.seq2seq.sequence_loss( dec_outputs, target_batch, masks)
optimizer = tf.train.AdamOptimizer(learning_rate)
gradients = optimizer.compute_gradients(cost)
capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
train_op = optimizer.apply_gradients(capped_gradients)
tf.summary.scalar('cost', cost)
merged = tf.summary.merge_all()
train_saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
train_sess = tf.Session(graph=train_graph)
if(model_path):
print(model_path)
train_saver.restore(train_sess, model_path)
else:
train_sess.run(init_op)
store_path = "./visualisations/"
writer = tf.summary.FileWriter(store_path, train_sess.graph)
step = 0
start_time = datetime.now()
while True:
try:
step += 1
model_cost, _, summary = train_sess.run((cost, train_op, merged))
writer.add_summary(summary,step)
if(step % display_step == 0):
save_path = train_saver.save(train_sess, "./checkpoints/NMT")
print("Model saved in path: %s" % save_path)
test_sess = tf.Session(graph=test_graph)
test_saver.restore(test_sess, save_path)
# print(test_sess.run(foo))
scores = []
while True:
try:
predictions, refrence = test_sess.run([dec_predictions, test_target_batch])
for i in range(len(refrence)):
BLEU_score = nltk.translate.bleu_score.sentence_bleu([np.trim_zeros(refrence[i])], np.trim_zeros(predictions[i]), weights = (1,0))
scores.append(BLEU_score)
print("########################################")
print("ref:", list(map(lambda x: target_ID2tok[x], np.trim_zeros(refrence[i]))))
print("")
print("pred:", list(map(lambda x: target_ID2tok[x], np.trim_zeros(predictions[i]))))
except tf.errors.OutOfRangeError:
print("Exhausted test data")
break
delta_time = datetime.now() - start_time
total_exp_time = delta_time * (total_steps / step)
remaining_time = total_exp_time - delta_time
print("")
print("Test set BLEU Score:", np.mean(scores))
print("Model cost:" ,model_cost)
print("Step {} from {}".format(step, total_steps))
print("Current time:", datetime.now())
print("Total Experiment time (Hours:Minutes:Seconds):", str(total_exp_time))
print("Time Elapled (Hours:Minutes:Seconds):", str(delta_time))
print("Time remaining (Hours:Minutes:Seconds):", str(remaining_time))
print("")
except tf.errors.OutOfRangeError:
print("Model finished training")
break
return save_path
Here is an output of a training run:
command line output
Is there something wrong with the way I'm executing the graph, or am I repeating some step leading to the memory fill up?
Thanks for all your help!
I'm trying to build a recommender system through a SkipGram Model, i built a variant of Word2vec adapted to my data (anime) where i use the anime watched list of a users like
data =[]
for key, chunk in ratings.groupby('user_id'):
data.append(chunk.sort_values('user_rating').anime_id.values)
my ratings dataframe has 30k unique users 12k unique anime , user rating and some metadata likes (anime_tags, year, numbers of episodes, synopsis ...etc.)
I'm trying to find a way to add that metadata in my "Anime2Vec" model.
May I need to build different model then (concat/blend/sum ???) the embeddings
about my baseline model it's construct like that i said previously watched session of users then i use this functions :
def get_batch(seq, rel_window_size=.2):
"""get a batch of data for a single user sequence.
data will be chosen randomly from within a anime
window relative to the size of the entire sequence.
with the initial point chosen at random.
"""
window = math.floor(len(seq) * rel_window_size) +1
x = []
y = []
i = random.randint(0, len(seq)-1)
for _ in range(batch_size):
x.append(seq[i])
moves = list(range(max(i-window, 0), i)) + list(range(i+1, min((i+window + 1, len(seq)))))
j = random.choice(moves)
y.append(seq[j])
i = j
return x, y
def gen_batch(data, batch_size, rel_window_size=.2, shuffle=True):
"""make a generator for data using each 'user' as a document."""
while True:
for doc_index, doc in enumerate(data):
x, y = get_batch(doc, rel_window_size)
yield np.array(x, 'int32'), np.expand_dims(np.array(y,'int32'), 1)
if shuffle:
random.shuffle(data)
Then i took few sample for try if my model works fine
val_anime = ['Naruto',
'Bleach',
"Clannad",
"Air",
"Code Geass Hangyaku no Lelouch",
"Overlord",
"Mononoke Hime",
"Hajime no Ippo",
"Fullmetal Alchemist"]
val_ids = [reverse_dictionary[s] for s in val_anime]
val_set = np.array(val_ids, 'int32')
Then i define model and data
# valid_examples = np.array(random.sample(range(1, valid_window+1), valid_size))# valid_e
vocab_size = len(dictionary)
batch_size = 128
embedding_size = 50 # Dimension of the embedding vector.
val_size =len(val_set)
lr = 1.
model = SkipGramModel(vocab_size, val_set, embedding_dims=embedding_size,
batch_size=batch_size, sample_factor=1., lr=lr, optimizer= tf.train.AdagradOptimizer)
# create the data generator
data_gen = gen_batch(data, batch_size, rel_window_size = .2, shuffle=True)
Parameters for training.
steps_per_cycle = (len(data) // batch_size)
n_iter = 300 # number of full cycles through data
num_steps = int(n_iter * steps_per_cycle)
lstep = steps_per_cycle # steps to show average loss
vstep = steps_per_cycle * 20 # steps to show val data
I define a function for nearest items
def show_n_similar(item, sim, k=5):
item_name = dictionary[item]
nearest = (-sim).argsort()[1:k + 1]
log_str = '\nNearest to - {}:\n'.format(item_name)
for k in range(k):
log_str += '\t{},\n'.format(dictionary[nearest[k]])
print(log_str)
losses = []
Finally i train model to find nearest neighbors to my previously defined list
with tf.Session() as sess:
sess.run(model.init_op())
print('initialized ...')
# set so we can watch average loss during training
for step in tqdm(range(num_steps)):
batch_inputs, batch_labels = next(data_gen)
feed_dict = {model.x: batch_inputs,
model.y: batch_labels}
_, loss_ = sess.run([model.optimize, model.loss], feed_dict=feed_dict)
losses.append(loss_) # for plotting
sim = model.similarity.eval()
for i in range(len(val_ids)):
show_n_similar(model.val_data[i], sim[i], k=5)
out_embeddings = model.normalized_embeddings.eval()
I would like some advice for add my metadata to the model, in a first try just add anime_tags.
My anime_tags is represented by a list of keys words like that :
anime_id | tags
8 | action, historical, sci-fi, comedy
12 | comedy, romance
I've modified some really simple tutorial code (below) to train a linear regression model (way simple because I'm trying to learn how it works). The training works fine as I get the predicted result. I am, however, finding it impossible to figure out the syntax to feed in a piece of test data (x) and get the model to spit out the predicted y value. The error I get is "cannot feed value of shape (1,) for Tensor which has shape (?,1).
Can somebody take a look at the last section (#rebuild the graph so we can make a prediction from test data....) and point me in the right direction please? Thank you in advance.
sample data for x is in column 0 of each row in the CSV
import numpy as np
import tensorflow as tf
import csv
//# Model linear regression y = Wx + b
x = tf.placeholder(tf.float32, [None, 1], name='varx')
W = tf.Variable(tf.zeros([1,1]), name='W')
b = tf.Variable(tf.zeros([1]),name='b')
product = tf.matmul(x,W)
y = tf.placeholder(tf.float32,[None,1],name='vary')
y = product + b
y_ = tf.placeholder(tf.float32, [None, 1], name='varouty')
//# Cost function sum((y_-y)**2)
cost = tf.reduce_mean(tf.square(y_-y))
//# Training using Gradient Descent to minimize cost
train_step = tf.train.GradientDescentOptimizer(0.0000001).minimize(cost)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
epochs = 500
list1 = []
with open(r'c:\temp\book1.csv','r') as f:
reader = csv.reader(f)
for row in reader:
i2 = int(row[0])
list1.append(i2)
//#now loop through the list and add the items
for i in range(epochs):
//# Create fake data for y = W.x + b where W = 2, b = 0
i0 = list1[i]
xs = np.array([[i0]])
ys = np.array([[2*i0]])
//# Train
feed = { x: xs, y_: ys }
sess.run(train_step, feed_dict=feed)
print("xs=" + str(xs))
print("ys=" + str(ys))
print("After %d iterations:" % i)
print("W: %f" % sess.run(W))
print("b: %f" % sess.run(b))
//# NOTE: W should be close to 2, and b should be close to 0
//#rebuild the graph so we can make a prediction from test data....
xg = tf.get_default_graph()
x_input = xg.get_tensor_by_name('varx:0')
y_output = xg.get_tensor_by_name('vary:0')
W1 = xg.get_tensor_by_name('W:0')
b1 = xg.get_tensor_by_name('b:0')
with tf.Session(graph=xg) as sess1:
x_example = [3]
y_prediction = sess1.run(y_output,feed_dict={x_input:x_example})
print(y_prediction)
I'm trying to fit an exponentially decaying model (y=Ax^b + C)to some data but have yet to get a value other than 0 for for b. I have two "working" sets of code right now, one steps through each X,Y pair, and the other attempts to use the entire [X,Y] array, but I'm not sure that I have implemented that correctly. For now I'd like for it to correctly fit a curve. The linear model works fine so I'm not sure where this is going south.
Data is here - PASTEBIN
#!/usr/bin/python
import numpy as np
import tensorflow as tf
import sys
import matplotlib.pyplot as plt
k=0
xdata= []
ydata = []
# Open the data and read it in, ignore the header.
with open('curvedata_full_formatted.csv') as f:
for line in f:
k+=1
if k==1:continue
items = line.split(',')
xdata.append(float(items[0]))
ydata.append(float(items[1]))
# Model linear regression y = A*x^B+C
# x - data to be fed into the model - 1 feature
x = tf.placeholder(tf.float32, [None, 1])
# A - training variable - 1 feature, 1 output
A = tf.Variable(tf.zeros([1,1]))
# B - training variable - 1 output
B = tf.Variable(tf.zeros([1,1]))
# C - training variable - 1 output
C = tf.Variable(tf.zeros([1]))
# x^B
xb = tf.exp(B)
# A*x^b
product = tf.mul(A,xb)
# Prediction
y = tf.add(product,C)
# Actual value ybar
y_ = tf.placeholder(tf.float32)
# Cost function sum((y_-y)**2)
cost = tf.reduce_mean(tf.square(y_-y))
# Training using Gradient Descent to minimize cost
train_step = tf.train.GradientDescentOptimizer(1*10**-9).minimize(cost)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
steps = 150
for i in range(steps):
# Read in data from log file and use as x,y
for (X,Y) in zip(xdata,ydata):
#xs = np.array([[xdata]])
#ys = np.array([[ydata]])
# Train
# Feed dict x placeholder xs, y_ placeholder ys
X = np.array([[X]])
Y = np.array([[Y]])
feed = { x: X, y_: Y }
sess.run(train_step, feed_dict=feed)
sys.stdout.write("\rIteration %i " %i +"cost %.15f" % sess.run(cost, feed_dict=feed))
sys.stdout.flush()
print ''
print 'A: %f'%sess.run(A)
print 'B: %f'%sess.run(B)
print 'C: %f'%sess.run(C)
As a test, try starting the optimizer with initial values close to the expected final parameters. This test will tell you whether or not the problem is in the selection of initial parameter values.