I am using tensorflow to do a linear regression. Here I am facing a problem:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (8,6)
data = pd.read_csv('./data.csv')
xs = data["A"][:100]
ys = data["B"][:100]
X = tf.placeholder(tf.float32, name='X')
Y = tf.placeholder(tf.float32, name='Y')
W = tf.Variable(tf.random_normal([1]),name = 'weight')
b = tf.Variable(tf.random_normal([1]),name = 'bias')
Y_pred = tf.add(tf.multiply(X,W), b)
sample_num = xs.shape[0]
loss = tf.reduce_sum(tf.pow(Y_pred - Y,2))/sample_num
learning_rate = 0.0001
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
n_samples = xs.shape[0]
init = tf.global_variables_initializer()
with tf.Session() as sess:
for i in range(100):
for x,y in zip(xs,ys):
_, l = sess.run([optimizer, loss], feed_dict={X: x, Y:y})
W, b = sess.run([W, b])
plt.plot(xs, ys, 'bo', label='Real data')
plt.plot(xs, xs*W + b, 'r', label='Predicted data')
The data.csv is here.
The plot is diametrically opposed to what I expected:
So, what is the problem? I am a beginner of python and tensorflow, and just can't reach the points.
As Nipun mentioned, try AdamOptimizer instead of GradientDescentOptimizer.
You will often find that AdamOptimizer is generally a better optimizer than GradientDescentOptimizer and reaches the minima much faster.
It does so by adapting the learning rate instead of keeping it constant (0.0001 in your case).
Also, more the number of epochs, better the model (not considering over-fitting here).
Since your learning rate and the number of epochs are too small, your regression models haven't converged. Therefore, you may need to increase the learning rate and use the tf.train.AdamOptimizer.
Here I set the learning rate to 2, epochs=10000 and got the following graph.
Here I have given the code with the comments where necessary.
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (8, 6)
data = pd.read_csv('./data.csv')
xs = data["A"][:100]
ys = data["B"][:100]
X = tf.placeholder(tf.float32, name='X')
Y = tf.placeholder(tf.float32, name='Y')
W = tf.Variable(tf.random_normal([1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
Y_pred = tf.add(tf.multiply(X, W), b)
loss = tf.reduce_mean(tf.pow(Y_pred - Y, 2))
learning_rate = 2 #increase the learning rate
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)#use the AdamOptimizer
BATCH_SIZE = 8 #Batch Size define here
n_samples = xs.shape[0]
init = tf.global_variables_initializer()
with tf.Session() as sess:
for i in range(10000): #increase the num of epoches
for start, end in zip(range(0, n_samples, BATCH_SIZE), # mini batch Gradientdecent
range(BATCH_SIZE, n_samples + 1, BATCH_SIZE)):
_, l = sess.run([optimizer, loss], feed_dict={X: xs[start:end], Y: ys[start:end]})
prediction = sess.run(Y_pred, feed_dict={X: xs})
#W, b = sess.run([W, b])
plt.plot(xs, ys, 'bo', label='Real data')
plt.plot(xs, prediction, 'r', label='Predicted data')
Also, you can use the mini batch gradientdescent method to accelerate the convergence as the code above.
Moreover, you can increase the number of epochs and learning rate further to get the optimal result.
Hope this helps.
I built a tensorflow graph according to a few different resources online and am trying to fit a curve to y=x^2. I am using two hidden layers with 25 and 10 neurons respectively. The code completes, but the cost function never reduces and the ultimate chart of x_test to preds is clearly wrong. I've looked at lots of different resources and some O'reilly books, and I can't for the life of me figure out where I'm going wrong. Any help would be greatly appreciated.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from sklearn.model_selection import train_test_split
# Create Data
X_range = np.arange(-50,50,0.1)
Y_range = (X_range ** 2)
data = pd.DataFrame({"x": X_range, "y":Y_range})
x = data['x'].values.reshape(1000,1)
y = data['y'].values.reshape(1000,)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = .30, random_state = 42)
c_t = []
# Create NNModel function
def neural_net_model(x_data, input_dim):
# Build the First Layer
W_1 = tf.Variable(tf.random_uniform([input_dim, 25]))
b_1 = tf.Variable(tf.zeros([25]))
layer_1 = tf.add(tf.matmul(x_data, W_1), b_1)
layer_1 = tf.nn.relu(layer_1)
# Build the Second Layer
W_2 = tf.Variable(tf.random_uniform([25, 10]))
b_2 = tf.Variable(tf.zeros([10]))
layer_2 = tf.add(tf.matmul(layer_1, W_2), b_2)
layer_2 = tf.nn.relu(layer_2)
# Build output
W_output = tf.Variable(tf.random_uniform([10, 1]))
b_output = tf.Variable(tf.zeros([1]))
output = tf.add(tf.matmul(layer_2, W_output), b_output)
return output
xs = tf.placeholder(dtype=tf.float32, name="xs")
ys = tf.placeholder(dtype=tf.float32, name='ys')
output = neural_net_model(xs, 1)
cost = tf.reduce_mean(tf.square(output-ys))
train = tf.train.GradientDescentOptimizer(.005).minimize(cost)
with tf.Session() as sess:
# Run for 100 Epochs
for i in range(100):
for j in range(x_train.shape[0]):
sess.run(train, feed_dict={xs: x_train[j].reshape(1,1), ys: y_train[j]})
if i % 10 == 0:
c_t.append(sess.run(cost, feed_dict={xs:x_train, ys:y_train}))
print('Epoch :',i,'Cost :',c_t[-1])
pred = sess.run(output, feed_dict={xs: x_test})
print("cost: ", sess.run(cost, feed_dict={xs: x_train, ys: y_train}))
plt.scatter(x_test,y_test,label="Original Data")
plt.scatter(x_test,pred,label="Predicted Data")
I was just playing with a model then i remembered this problem
the issue was Y_range is so big in some point, so for tensorflow in order to run some calculation the memory will exceed and the value will be inf
for that you need to normalize the data with mean equal to 0 and std equal to one
# Create Data
X_range = np.arange(-50,50,0.1)
Y_range = (X_range ** 2)
def normalizor(x):
return (x-mean)/std
plus change the optimizer from GradientDiscent to Adam
train = tf.train.AdamOptimizer(.005).minimize(cost)
I am new to tensorflow and i am tasked to design a feedforward neural network which consists of: an input layer, one hidden perceptron layer of 10 neurons and an output softmax layer. Assume a learning rate of 0.01, L2 regularization with weight decay parameter of 0.000001, and batch size of 32.
I would like to know if there is anyway to know if the network that I have created is what intend to create. Like a graph showing the nodes?
The following is attempt on the task but I am not sure if it is correct.
import math
import tensorflow as tf
import numpy as np
import pylab as plt
# scale data
def scale(X, X_min, X_max):
return (X - X_min)/(X_max-X_min)
def tfvariables(start_nodes, end_nodes):
W = tf.Variable(tf.truncated_normal([start_nodes, end_nodes], stddev=1.0/math.sqrt(float(start_nodes))))
b = tf.Variable(tf.zeros([end_nodes]))
return W, b
learning_rate = 0.01
beta = 10 ** -6
epochs = 10000
batch_size = 32
num_neurons = 10
seed = 10
#read train data
train_input = np.loadtxt('sat_train.txt',delimiter=' ')
trainX, train_Y = train_input[:, :36], train_input[:, -1].astype(int)
trainX = scale(trainX, np.min(trainX, axis=0), np.max(trainX, axis=0))
# There are 6 class-labels 1,2,3,4,5,7
train_Y[train_Y == 7] = 6
trainY = np.zeros((train_Y.shape[0], NUM_CLASSES))
trainY[np.arange(train_Y.shape[0]), train_Y-1] = 1 #one matrix
# experiment with small datasets
trainX = trainX[:1000]
trainY = trainY[:1000]
n = trainX.shape[0]
# Create the model
x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
y_ = tf.placeholder(tf.float32, [None, NUM_CLASSES])
# Build the graph for the deep net
W1, b1 = tfvariables(NUM_FEATURES, num_neurons)
W2, b2 = tfvariables(num_neurons, NUM_CLASSES)
logits_1 = tf.matmul(x, W1) + b1
perceptron_layer = tf.nn.sigmoid(logits_1)
logits_2 = tf.matmul(perceptron_layer, W2) + b2
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=logits_2)
# Standard Loss
loss = tf.reduce_mean(cross_entropy)
# Loss function with L2 Regularization with beta
regularizers = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2)
loss = tf.reduce_mean(loss + beta * regularizers)
# Create the gradient descent optimizer with the given learning rate.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(cross_entropy)
correct_prediction = tf.cast(tf.equal(tf.argmax(logits_2, 1), tf.argmax(y_, 1)), tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
train_acc = []
train_loss = []
for i in range(epochs):
train_op.run(feed_dict={x: trainX, y_: trainY})
train_acc.append(accuracy.eval(feed_dict={x: trainX, y_: trainY}))
train_loss.append(loss.eval(feed_dict={x: trainX, y_: trainY}))
if i % 500 == 0:
print('iter %d: accuracy %g loss %g'%(i, train_acc[i], train_loss[i]))
# plot learning curves
plt.plot(range(epochs), train_acc)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Train accuracy')
# plot learning curves
plt.plot(range(epochs), train_loss)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Train loss')
You can utitilize Tensorboard to visualize the graph you created. Basically, you have to follow a few steps to do this:
declare a writer as writer = tf.summary.FileWriter('PATH/TO/A/LOGDIR')
add the graph to the writer with writer.add_graph(sess.graph) with sess being your current tf.Session() in which you execute the graph
possibly you have to use writer.flush() to write it to disk immediately
Note that you have to add these lines AFTER building your graph.
You can view the graph by executing this command in your shell:
tensorboard --logdir=PATH/TO/A/LOGDIR
Then you are presented an address (usually something like localhost:6006) on which you can view the graph with your browser (Chrome and Firefox are guaranteed to work).
Tensorboard (in TensorFlow) is useful tool.
Use tf.summary.FileWriter for writing the graph into a folder and run tensorboard from the corresponding directory.
Check the following links:
I have been trying to use an LSTM for regression in TensorFlow, but it doesn't fit the data. I have successfully fit the same data in Keras (with the same size network). My code for trying to overfit a sine wave is below:
import tensorflow as tf
import numpy as np
yt = np.cos(np.linspace(0, 2*np.pi, 256))
xt = np.array([yt[i-50:i] for i in range(50, len(yt))])[...,None]
yt = yt[-xt.shape[0]:]
g = tf.Graph()
with g.as_default():
x = tf.constant(xt, dtype=tf.float32)
y = tf.constant(yt, dtype=tf.float32)
lstm = tf.nn.rnn_cell.BasicLSTMCell(32)
outputs, state = tf.nn.dynamic_rnn(lstm, x, dtype=tf.float32)
pred = tf.layers.dense(outputs[:,-1], 1)
loss = tf.reduce_mean(tf.square(pred-y))
train_op = tf.train.AdamOptimizer().minimize(loss)
init = tf.global_variables_initializer()
sess = tf.InteractiveSession(graph=g)
for i in range(200):
_, l = sess.run([train_op, loss])
This results in a MSE of 0.436067 (while Keras got to 0.0022 after 50 epochs), and the predictions range from -0.1860 to -0.1798. What am I doing wrong here?
When I change my loss function to the following, the model fits properly:
def pinball(y_true, y_pred):
tau = np.arange(1,100).reshape(1,-1)/100
pin = tf.reduce_mean(tf.maximum(y_true[:,None] - y_pred, 0) * tau +
tf.maximum(y_pred - y_true[:,None], 0) * (1 - tau))
return pin
I also change the assignments of pred and loss to
pred = tf.layers.dense(outputs[:,-1], 99)
loss = pinball(y, pred)
This results in a decrease of loss from 0.3 to 0.003 as it trains, and seems to properly fit the data.
Looks like a shape/broadcasting issue. Here's a working version:
import tensorflow as tf
import numpy as np
yt = np.cos(np.linspace(0, 2*np.pi, 256))
xt = np.array([yt[i-50:i] for i in range(50, len(yt))])
yt = yt[-xt.shape[0]:]
g = tf.Graph()
with g.as_default():
x = tf.constant(xt, dtype=tf.float32)
y = tf.constant(yt, dtype=tf.float32)
lstm = tf.nn.rnn_cell.BasicLSTMCell(32)
outputs, state = tf.nn.dynamic_rnn(lstm, x[None, ...], dtype=tf.float32)
pred = tf.squeeze(tf.layers.dense(outputs, 1), axis=[0, 2])
loss = tf.reduce_mean(tf.square(pred-y))
train_op = tf.train.AdamOptimizer().minimize(loss)
init = tf.global_variables_initializer()
sess = tf.InteractiveSession(graph=g)
for i in range(200):
_, l = sess.run([train_op, loss])
x gets a batch dimension of 1 before going into dynamic_rnn, since with time_major=False the first dimension is expected to be a batch dimension. It's important that the last dimension of the output of tf.layers.dense get squeezed off so that it doesn't broadcast with y (TensorShape([256, 1]) and TensorShape([256]) broadcast to TensorShape([256, 256])). With those fixes it converges:
You are not passing-on the state from one call of dynamic_rnn to next. That's the problem for sure.
Also, why take only last item of the output through the dense layer and onward?
I am new to machine learning and Tensorflow. Currently I am trying to follow the tutorial's logic to create a simple linear regression model of form y = a*x (there is no bias term here) . However, for some reason, the model fail to converge to the correct value "a". The data set is created by me in excel. As shown below:
here is my code that tries to run tensorflow on this dummy data set I generated.
import tensorflow as tf
import pandas as pd
w = tf.Variable([[5]],dtype=tf.float32)
b = tf.Variable([-5],dtype=tf.float32)
x = tf.placeholder(shape=(None,1),dtype=tf.float32)
y = tf.add(tf.matmul(x,w),b)
label = tf.placeholder(dtype=tf.float32)
loss = tf.reduce_mean(tf.squared_difference(y,label))
data = pd.read_csv("D:\\dat2.csv")
xs = data.iloc[:,:1].as_matrix()
ys = data.iloc[:,1].as_matrix()
optimizer = tf.train.GradientDescentOptimizer(0.000001).minimize(loss)
sess = tf.InteractiveSession()
for i in range(10000):
if i%100 == 0: print(i,sess.run(w))
below is the print out in ipython console, as you can see after 10000th iteration, the value for w is around 4.53 instead of the correct value 6.
I would really appreciate if anyone could shed some light on what is going on wrong here. I have played around with different learning rate from 0.01 to 0.0000001, none of the setting is able to have the w converge to 6. I have read some suggesting to normalize the feature to standard normal distribution, I would like to know if this normalization is a must? without normalization, gradientdescent is not able to find the solution? Thank you very much!
It is a shaping problem: y and label don't have the same shape ([batch_size, 1] vs [batch_size]). In loss = tf.reduce_mean(tf.squared_difference(y, label)), it causes tensorflow to interpret things differently from what you want, probably by using some broadcasting... Anyway, the result is that your loss is not at all the one you want.
To correct that, simply replace
y = tf.add(tf.matmul(x, w), b)
y = tf.add(tf.matmul(x, w), b)
y = tf.reshape(y, shape=[-1])
My full working code below:
import tensorflow as tf
import pandas as pd
w = tf.Variable([[4]], dtype=tf.float64)
b = tf.Variable([10.0], dtype=tf.float64, trainable=True)
x = tf.placeholder(shape=(None, 1), dtype=tf.float64)
y = tf.add(tf.matmul(x, w), b)
y = tf.reshape(y, shape=[-1])
label = tf.placeholder(shape=(None), dtype=tf.float64)
loss = tf.reduce_mean(tf.squared_difference(y, label))
my_path = "/media/sf_ShareVM/data2.csv"
data = pd.read_csv(my_path, sep=";")
max_n_samples_to_use = 50
xs = data.iloc[:max_n_samples_to_use, :1].as_matrix()
ys = data.iloc[:max_n_samples_to_use, 1].as_matrix()
lr = 0.000001
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(loss)
sess = tf.InteractiveSession()
for i in range(100000):
_, loss_value, w_value, b_value, y_val, lab_val = sess.run([optimizer, loss, w, b, y, label], {x: xs, label: ys})
if i % 100 == 0: print(i, loss_value, w_value, b_value)
if (i%2000 == 0 and 0< i < 10000): # We use a smaller LR at first to avoid exploding gradient. It would be MUCH cleaner to use gradient clipping (by global norm)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(loss)
I implemented a basic MLP and I want it to predict a user-generated set of data, but the prediction looks as follows:
I am not sure why... I have nonlinearities in the hidden layers, and I tried multiple activations (ReLU, tanh, sigmoid), tried different optimisers, different learning rates, various architectures (more layers, fewer layers, dropout), but I never got this right.
Please note that I do believe it may be because of how I compute the predictions at the end (pred = sess.run(out, feed_dict={inputs:X.reshape(n_input, 1)})) as it may be incorrect, but I wouldn't know why. I also tried other methods like extracting the weights with w = sess.run(weights) and then feeding them to the model() function along with the input, but nothing worked.
Also, when monitoring the error, the error decreases between epochs.
Any ideas?
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Architecture
input_size = 1
output_size = 1
h1_size = 20
h2_size = 50
# 2 hidden layers network
def model(inputs, weights):
out1 = tf.nn.relu(tf.matmul(inputs, weights['h1']))
out2 = tf.nn.relu(tf.matmul(out1, weights['h2']))
return tf.matmul(out2, weights['h3'])
# Inputs/label placeholders
inputs = tf.placeholder('float', shape=(None, input_size))
labels = tf.placeholder('float', shape=(None, output_size))
# Learnable weights
weights = {
'h1': tf.Variable(tf.random_normal(shape=(input_size, h1_size))),
'h2': tf.Variable(tf.random_normal(shape=(h1_size, h2_size))),
'h3': tf.Variable(tf.random_normal(shape=(h2_size, output_size))),
# Stores the result from the net
out = model(inputs, weights)
# Cost and optimisation
cost = tf.reduce_mean(tf.square(out - labels))
opt = tf.train.AdadeltaOptimizer()
opt_operation = opt.minimize(cost)
# Generate some data
n_input = 1000
X = np.linspace(0, 1, n_input).astype('f')
y = X + 5 * np.sin(X * 10)
y /= max(y)
# Train
epochs = 2000
lr = 0.0000001
with tf.Session() as sess:
for epoch in range(epochs):
_, c = sess.run([opt_operation, cost], feed_dict={
inputs: X.reshape(n_input, 1),
labels: y.reshape(n_input, 1),
if not epoch % int(epochs/20):
pred = sess.run(out, feed_dict={inputs:X.reshape(n_input, 1)})
plt.scatter(X, pred, color='red', label='prediction')
plt.scatter(X, y, label='data')
Forgot bias terms: new graph
It works now but not sure if this fixed it?
New code uses:
weights = {
'h1': tf.Variable(tf.random_normal(shape=(input_size, h1_size))),
'h2': tf.Variable(tf.random_normal(shape=(h1_size, h2_size))),
'h3': tf.Variable(tf.random_normal(shape=(h2_size, output_size))),
'b1': tf.Variable(tf.zeros(shape=[1])),
'b2': tf.Variable(tf.zeros(shape=[1])),
'b3': tf.Variable(tf.zeros(shape=[1])),
def model(inputs, weights):
out1 = tf.nn.relu(tf.matmul(inputs, weights['h1']) + weights['b1'])
out2 = tf.nn.relu(tf.matmul(out1, weights['h2']) + weights['b2'])
return tf.matmul(out2, weights['h3'] + weights['b3'])