Tensorflow Linear Regression predictions returning [nan] - python

I am trying to create my first linear regressor using Tensor Flow (without the help of estimators), and in each iteration, I only see a cost value of NaN. I think I am not doing something right, but unable to zero in on the issue. Can someone please help me troubleshoot the problem?
I am using the CA housing dataset
# Common imports
import math
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn import metrics
california_housing_dataframe = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv", sep=",")
I am predicting the median_house_value column
data_X = california_housing_dataframe.iloc[:, :8]
data_y = california_housing_dataframe.iloc[:, 8]
print('Features (X):\n', data_X.head(), '\n')
print('Target (y):\n', data_y.head(), '\n')
Create training and validation sets
from sklearn.model_selection import train_test_split
data_X_train, data_X_validate = train_test_split(data_X, test_size=0.2, random_state=42)
data_y_train, data_y_validate = train_test_split(data_y, test_size=0.2, random_state=42)
Setup the hyperspace parameters and TensorFlow variables
# Hyperspace Params
learning_rate = 0.01
training_epochs = 1 #40
batch_size = 500 #50
totalBatches = len(data_X_train)/batch_size
n, m = data_X_train.shape # 17,000 Rows + 9 Features
print('n=', n, ', m=', m)
W = tf.Variable(tf.random_uniform([m, 1], -1.0, 1.0, dtype = tf.float64), name="theta") # Random initialization
b = tf.Variable(np.random.randn(), name = "b", dtype = tf.float64)
X = tf.placeholder(tf.float64, shape=(None, m), name="X")
y = tf.placeholder(tf.float64, shape=(None, 1), name="y")
print('X.shape :\n', X.shape, '\n')
print('y.shape :\n', y.shape, '\n')
print('b.shape :\n', b.shape, '\n')
print('Thetha.shape (W):\n', W.shape, '\n')
y_pred = tf.add(tf.matmul(X, W), b, name="predictions")
error = y_pred - y
cost = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Global Variables Initializer
init = tf.global_variables_initializer()
Now, training the model returns me NaN values only
def get_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X.iloc[batch_idx, :], y[batch_idx]
yield X_batch, y_batch
# Global Variables Initializer
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
for X_batch, y_batch in get_batch(data_X_train, data_y_train, batch_size):
y_batch = np.array(y_batch).reshape(-1, 1)
sess.run(optimizer, feed_dict={X: X_batch, y: y_batch})
curr_y_pred, curr_error, curr_cost = sess.run([y_pred, error, cost], {X: X_batch, y: y_batch})
print('Training... batch.shape: ', X_batch.shape,'curr_error:', curr_error)
Result looks like
Training... batch.shape: (504, 8) curr_error: [[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
...

Your issue comes from the pd.read_csv(...) function. I swapped it for the NumPy version (I am not familiar with Pandas) and it works like a charm. Here is the whole snippet:
import math
import numpy as np
import tensorflow as tf
from sklearn import metrics
california_housing_dataframe = np.genfromtxt('https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv', delimiter=',', skip_header=1)
data_X = california_housing_dataframe[:, :8]
data_y = california_housing_dataframe[:, 8]
from sklearn.model_selection import train_test_split
data_X_train, data_X_validate = train_test_split(data_X, test_size=0.2, random_state=42)
data_y_train, data_y_validate = train_test_split(data_y, test_size=0.2, random_state=42)
# Hyperspace Params
learning_rate = 0.01
training_epochs = 1 #40
batch_size = 500 #50
totalBatches = len(data_X_train)/batch_size
n, m = data_X_train.shape # 17,000 Rows + 9 Features
print('n=', n, ', m=', m)
W = tf.Variable(tf.random_uniform([m, 1], -1.0, 1.0, dtype = tf.float64), name="theta") # Random initialization
b = tf.Variable(np.random.randn(), name = "b", dtype = tf.float64)
X = tf.placeholder(tf.float64, shape=(None, m), name="X")
y = tf.placeholder(tf.float64, shape=(None, 1), name="y")
print('X.shape :\n', X.shape, '\n')
print('y.shape :\n', y.shape, '\n')
print('b.shape :\n', b.shape, '\n')
print('Thetha.shape (W):\n', W.shape, '\n')
y_pred = tf.add(tf.matmul(X, W), b, name="predictions")
error = y_pred - y
cost = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Global Variables Initializer
init = tf.global_variables_initializer()
def get_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx, :], y[batch_idx]
yield X_batch, y_batch
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
for X_batch, y_batch in get_batch(data_X_train, data_y_train, batch_size):
y_batch = np.array(y_batch).reshape(-1, 1)
sess.run(optimizer, feed_dict={X: X_batch, y: y_batch})
curr_y_pred, curr_error, curr_cost = sess.run([y_pred, error, cost], {X: X_batch, y: y_batch})
print('Training... batch.shape: ', X_batch.shape,'curr_error:', curr_error)

Related

Why isn't Tensorboard showing any graphs even though I have log files?

When I open Tensorboard, I get a window that says "no dashboards are active for the current data set" even though my Tensorboard log directory has files in it.
Here's the command I'm using to start Tensorboard
tensorboard --logdir tf_logs/
The tf_logs directory has these folders and files in it
run-20190609234531
events.out.tfevents.1560125157.BRUBIN
run-20190610010816
events.out.tfevents.1560128897.BRUBIN
run-20190610010949
events.out.tfevents.1560128989.BRUBIN
Here's the code that I used to create the log files (the add_summary is towards the end of the code).
import datetime
import numpy as np
import sklearn
import tensorflow as tf
from datetime import datetime
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
def fetch_batch(epoch, batch_index, batch_size):
np.random.seed(epoch * n_batches + batch_index)
indices = np.random.randint(m, size=batch_size)
X_batch = scaled_housing_data_plus_bias[indices]
y_batch = housing.target.reshape(-1, 1)[indices]
return X_batch, y_batch
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
scaler = StandardScaler(copy = True)
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
init = tf.global_variables_initializer()
batch_size = 100
learning_rate = 0.01
n_epochs = 1000
n_batches = int(np.ceil(m / batch_size))
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
for batch_index in range(n_batches):
X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
if batch_index % 10 == 0:
summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
step = epoch * n_batches + batch_index
##### Write the Tensorboard log #####
file_writer.add_summary(summary_str, step)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
best_theta = theta.eval()
file_writer.close()
sess.close()
Why is Tensorboard not showing these graphs?
I ran your code and found one error:
learning_rate = 0.01
is defined after it's used.
I changed it to be defined before it's used and the code ran fine, I also ran tensorboard and it showed me the graph and scalars.
however if that is not your problem I can think of only one other problem:
You have to be in the directory where the tf_logs directory is in.

loss: nan using Keras vs non-nan (working) output using tensorflow

I am trying to replicate old code that I had in tensorflow but in Keras format. For some reason my loss is always nan. I think the error is in the loss that I am using ('categorical_crossentropy' in keras vs 'tf.nn.softmax_cross_entropy_with_logits' in tensorflow)
Keras code:
import keras
from keras.models import Sequential
from keras.layers import Dropout, Dense, Activation
from keras.regularizers import l2
from keras.layers.normalization import BatchNormalization
# Keras items
from keras.optimizers import Adam, Nadam
from keras.activations import relu, elu
from keras.losses import binary_crossentropy, categorical_crossentropy
from keras import metrics
import pandas as pd
import numpy as np
x_main = pd.read_csv("glioma DB X.csv")
y_main = pd.read_csv("glioma DB Y.csv")
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_main, y_main, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
# train shape
np.shape(x_train), np.shape(y_train)
((132, 47), (132, 1))
# Normalize training data; will want to have the same mu and sigma for test
def normalize_features(dataset):
mu = np.mean(dataset, axis = 0) # columns
sigma = np.std(dataset, axis = 0)
norm_parameters = {'mu': mu,
'sigma': sigma}
return (dataset-mu)/(sigma+1e-10), norm_parameters
# Normal X data; using same mu and sigma from test set;
x_train, norm_parameters = normalize_features(x_train)
x_val = (x_val-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_test = (x_test-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
params = {'lr': 0.001,
'batch_size': 30,
'epochs': 8000,
'dropout': 0.5,
'weight_regulizer':['l2'],
'optimizer': 'adam',
'losses': 'categorical_crossentropy',
'activation':'relu',
'last_activation': 'softmax'}
from keras.utils.np_utils import to_categorical
#categorical_labels = to_categorical(int_labels, num_classes=None)
if params['losses']=='categorical_crossentropy':
y_train = to_categorical(y_train,num_classes=4)
y_val = to_categorical(y_val,num_classes=4)
y_test = to_categorical(y_test,num_classes=4)
model = Sequential()
# layer 1
model.add(Dense(30, input_dim=x_train.shape[1],
W_regularizer=l2(0.01),
kernel_initializer='he_uniform'))
model.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout']))
# layer 2
model.add(Dense(20, W_regularizer=l2(0.01),
kernel_initializer='he_uniform'))
model.add(BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True))
model.add(Activation(params['activation']))
model.add(Dropout(params['dropout']))
# if we want to also test for number of layers and shapes, that's possible
#hidden_layers(model, params, 1)
# Last layer
model.add(Dense(4, activation=params['last_activation'],
kernel_initializer='he_uniform'))
model.compile(loss=params['losses'],
optimizer=keras.optimizers.adam(lr=params['lr']),
metrics=['categorical_accuracy'])
history = model.fit(x_train, y_train,
validation_data=[x_val, y_val],
batch_size=params['batch_size'],
epochs=params['epochs'],
verbose=1)
Working code using tensorflow which gives me a pretty loss graph haha:
x_train, x_test, y_train, y_test = train_test_split(X_main, Y_main, test_size=0.3)
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.5)
# ANOTHER OPTION IS TO USE SKLEARN sklearn.model_selection.ShuffleSplit
# look into stratification
# Normalize training data; will want to have the same mu and sigma for test
def normalize_features(dataset):
mu = np.mean(dataset, axis = 0) # columns
sigma = np.std(dataset, axis = 0)
norm_parameters = {'mu': mu,
'sigma': sigma}
return (dataset-mu)/(sigma+1e-10), norm_parameters
# TRY LOG TRANSFORMATION LOG(1+X) to deal with outliers
# change ordinal to one hot vector
# to make label encoder
# for c in x_train.columns[x_train.dtype == 'object']:
# X[c] (which was copy of xtrain) X[c].factorize()[0]
# able to plot feature importance in random forest
# Normal X data; using same mu and sigma from test set; then transposed
x_train, norm_parameters = normalize_features(x_train)
x_val = (x_val-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_test = (x_test-norm_parameters['mu'])/(norm_parameters['sigma']+1e-10)
x_train = np.transpose(x_train)
x_val = np.transpose(x_val)
x_test = np.transpose(x_test)
y_train = np.transpose(y_train)
y_val = np.transpose(y_val)
y_test = np.transpose(y_test)
# converting values from database to matrix
x_train = x_train.as_matrix()
x_val = x_val.as_matrix()
x_test = x_test.as_matrix()
y_train = y_train.as_matrix()
y_val = y_val.as_matrix()
y_test = y_test.as_matrix()
# testing shape
#print(y_train.shape)
#print(y_val.shape)
#print(y_test.shape)
#
#print(x_train.shape)
#print(x_val.shape)
#print(x_test.shape)
# convert y to array per value so 3 = [0 0 1]
def convert_to_one_hot(Y, C):
Y = np.eye(C)[Y.reshape(-1)].T
return Y
y_train = convert_to_one_hot(y_train, 4)
y_val = convert_to_one_hot(y_val, 4)
y_test = convert_to_one_hot(y_test, 4)
print ("number of training examples = " + str(x_train.shape[1]))
print ("number of test examples = " + str(x_test.shape[1]))
print ("X_train shape: " + str(x_train.shape))
print ("Y_train shape: " + str(y_train.shape))
print ("X_test shape: " + str(x_test.shape))
print ("Y_test shape: " + str(y_test.shape))
# minibatches for later
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
"""
Creates a list of random minibatches from (X, Y)
Arguments:
X -- input data, of shape (input size, number of examples)
Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
mini_batch_size - size of the mini-batches, integer
seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
Returns:
mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
"""
m = X.shape[1] # number of training examples
mini_batches = []
# Step 1: Shuffle (X, Y)
permutation = list(np.random.permutation(m))
shuffled_X = X[:, permutation]
shuffled_Y = Y[:, permutation].reshape((Y.shape[0],m))
# Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
for k in range(0, num_complete_minibatches):
mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
# starting TF graph
# Create X and Y placeholders
def create_xy_placeholder(n_x, n_y):
X = tf.placeholder(tf.float32, shape = [n_x, None], name = 'X')
Y = tf.placeholder(tf.float32, shape = [n_y, None], name = 'Y')
return X, Y
# initialize parameters hidden layers
def initialize_parameters(n_x, scale, hidden_units):
hidden_units= [n_x] + hidden_units
parameters = {}
regularizer = tf.contrib.layers.l2_regularizer(scale)
for i in range(0, len(hidden_units[1:])):
with tf.variable_scope('hidden_parameters_'+str(i+1)):
w = tf.get_variable("W"+str(i+1), [hidden_units[i+1], hidden_units[i]],
initializer=tf.contrib.layers.xavier_initializer(),
regularizer=regularizer)
b = tf.get_variable("b"+str(i+1), [hidden_units[i+1], 1],
initializer = tf.constant_initializer(0.1))
parameters.update({"W"+str(i+1): w})
parameters.update({"b"+str(i+1): b})
return parameters
# forward progression with batch norm and dropout
def forward_propagation(X, parameters, batch_norm=False, keep_prob=1):
a_new = X
for i in range(0, int(len(parameters)/2)-1):
with tf.name_scope('forward_pass_'+str(i+1)):
w = parameters['W'+str(i+1)]
b = parameters['b'+str(i+1)]
z = tf.matmul(w, a_new) + b
if batch_norm == True:
z = tf.layers.batch_normalization(z, momentum=0.99, axis=0)
a = tf.nn.relu(z)
if keep_prob < 1:
a = tf.nn.dropout(a, keep_prob)
a_new = a
tf.summary.histogram('act_'+str(i+1), a_new)
# calculating final Z before input into cost as logit
with tf.name_scope('forward_pass_'+str(int(len(parameters)/2))):
w = parameters['W'+str(int(len(parameters)/2))]
b = parameters['b'+str(int(len(parameters)/2))]
z = tf.matmul(w, a_new) + b
if batch_norm == True:
z = tf.layers.batch_normalization(z, momentum=0.99, axis=0)
return z
# compute cost with option for l2 regularizatoin
def compute_cost(z, Y, parameters, l2_reg=False):
with tf.name_scope('cost'):
logits = tf.transpose(z)
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits,
labels = labels))
if l2_reg == True:
reg = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
cost = cost + tf.reduce_sum(reg)
with tf.name_scope('Pred/Accuracy'):
prediction=tf.argmax(z)
correct_prediction = tf.equal(tf.argmax(z), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
return cost, prediction, accuracy
# defining the model (need to add keep_prob for dropout)
def model(X_train, Y_train, X_test, Y_test,
hidden_units=[30, 20, 4], # hidden units/layers
learning_rate = 0.0001, # Learning rate
num_epochs = 10000, minibatch_size = 30, # minibatch/ number epochs
keep_prob=0.5, # dropout
batch_norm=True, # batch normalization
l2_reg=True, scale = 0.01, # L2 regularization/scale is lambda
print_cost = True):
ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables
tf.set_random_seed(1) # to keep consistent results
seed = 3 # to keep consistent results
(n_x, m) = X_train.shape # (n_x: input size, m : number of examples in the train set)
n_y = Y_train.shape[0] # n_y : output size
costs = [] # To keep track of the cost
# Create Placeholders of shape (n_x, n_y)
X, Y = create_xy_placeholder(n_x, n_y)
# Initialize parameters
parameters = initialize_parameters(n_x, scale, hidden_units)
# Forward propagation: Build the forward propagation in the tensorflow graph
z = forward_propagation(X, parameters, keep_prob, batch_norm)
# Cost function: Add cost function to tensorflow graph
cost, prediction, accuracy = compute_cost(z, Y, parameters, l2_reg)
# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
with tf.name_scope('optimizer'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
# Initialize all the variables
init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# Start the session to compute the tensorflow graph
with tf.Session(config=config) as sess:
# Run the initialization
sess.run(init)
# Do the training loop
for epoch in range(num_epochs):
epoch_cost = 0. # Defines a cost related to an epoch
num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
seed = seed + 1
minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
# Select a minibatch
(minibatch_X, minibatch_Y) = minibatch
# IMPORTANT: The line that runs the graph on a minibatch.
# Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
_ , minibatch_cost = sess.run([optimizer, cost],
feed_dict = {X: minibatch_X, Y: minibatch_Y})
epoch_cost += minibatch_cost / num_minibatches
# Print the cost every epoch
if print_cost == True and epoch % 100 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
prediction1=tf.argmax(z)
# print('Z5: ', Z5.eval(feed_dict={X: minibatch_X, Y: minibatch_Y}))
print('prediction: ', prediction1.eval(feed_dict={X: minibatch_X,
Y: minibatch_Y}))
correct1=tf.argmax(Y)
# print('Y: ', Y.eval(feed_dict={X: minibatch_X,
# Y: minibatch_Y}))
print('correct: ', correct1.eval(feed_dict={X: minibatch_X,
Y: minibatch_Y}))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
# lets save the parameters in a variable
parameters = sess.run(parameters)
print ("Parameters have been trained!")
# Calculate the correct predictions
correct_prediction = tf.equal(tf.argmax(z), tf.argmax(Y))
# Calculate accuracy on the test set
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
return parameters
# run model on test data
parameters = model(x_train, y_train, x_test, y_test, keep_prob=1)

Tensorflow outputs accuracy 1 but all the results are the same (and wrong)

I'm trying to build a Tensorflow Neural Network, but can't get it to work correctly. It always outputs the same values for every observation. Tried to change activations functions, changing the learning rate, reshaping tensors and arrays, I'm missing something here.
The dataset is entirely numerical.
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
df = pd.read_csv("data.csv")
X = np.array(df[["area","bathrooms", "sq_price"]])
y = df[["price"]]
y = np.array(y).reshape([47, 1])
normalizer = Normalizer()
X = normalizer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_test = np.array(X_test, dtype='float32')
y_test = np.array(y_test, dtype="float32")
X_test.shape
n_inputs = len(X_train[0])
n_hidden = 5
n_outputs = 1
lr = 0.001
epochs = 5000
batch_size = 1
xph = tf.placeholder(tf.float32, [None, n_inputs])
yph = tf.placeholder(tf.float32, [None, n_outputs])
W1 = tf.Variable(tf.truncated_normal([n_inputs, n_hidden], stddev=0.1))
W2 = tf.Variable(tf.truncated_normal([n_hidden, n_outputs], stddev=0.1))
b1 = tf.Variable(tf.ones([n_hidden]))
b2 = tf.Variable(tf.ones([n_outputs]))
def feed_forward(X, W1, W2, b1, b2):
l1 = tf.tanh(tf.add(tf.matmul(X, W1), b1))
l2 = tf.add(tf.matmul(l1, W2),b2)
return l2
output = feed_forward(xph, W1, W2, b1, b2)
error = tf.reduce_mean((output - yph)**2)
optimizer = tf.train.GradientDescentOptimizer(lr).minimize(error)
init = tf.global_variables_initializer()
correct_prediction = tf.equal(tf.argmax(output,1), tf.argmax(yph,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(init)
print("Weights 1 before training:\n ", sess.run([W1]), "\n")
for i in range(epochs):
rand_ind = np.random.randint(len(X_train), size = batch_size)
loss = sess.run([optimizer], feed_dict={xph: X_train[rand_ind], yph: y_train[rand_ind]})
if i == (epochs-1):
print("Done!\n")
print("Weights 1 after training: \n", sess.run([W1]), "\n")
print("Accuracy: ", sess.run(accuracy, feed_dict={xph:X_test, yph:y_test}), "\n")
print("Results for testing:\n ", sess.run(feed_forward(X_test, W1, W2, b1, b2), feed_dict={xph: X_test}), "\n")
print("Expected values:\n ", y_test)
The output of the code:
Weights 1 before training:
[array([[ 0.02620826, -0.11837681, 0.01349821, 0.04195584, 0.14087772],
[-0.10512593, 0.1383599 , -0.0632275 , 0.07759375, -0.09907298],
[-0.14932911, 0.13720822, 0.15072195, -0.09748196, 0.08388615]],
dtype=float32)]
Done!
Weights 1 after training:
[array([[ 1.0387952e+01, 1.7232073e+01, 2.9152514e+01, -1.8471668e+01,
2.2215986e+01],
[-8.4001064e-02, 1.7373289e-01, -3.8207369e-03, 3.9849356e-02,
-5.4067656e-02],
[ 5.5025685e-01, 1.3086454e+00, 2.1180787e+00, -1.3474523e+00,
1.5743145e+00]], dtype=float32)]
Accuracy: 1.0
Results for testing:
[[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]
[340043.88]]
Expected values:
[[349900.]
[369000.]
[573900.]
[252900.]
[299900.]
[329900.]
[449900.]
[285900.]
[212000.]
[229900.]]

Logits and labels must be same size

I'm trying to create a neural network that takes 13 features as input from multiple csv files one at a time and measure accuracy after each iteration. Here is my code snippet:
import tensorflow as tf
import numpy as np
from tensorflow.contrib.layers import fully_connected
import os
import pandas as pd
n_inputs = 13
n_hidden1 = 30
n_hidden2 = 10
n_outputs = 2
learning_rate = 0.01
n_epochs = 40
batch_size = 1
patient_id = os.listdir('./subset_numerical')
output = pd.read_csv('output.csv')
sepsis_pat = output['output'].tolist()
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
y = tf.placeholder(tf.int64, shape=[None], name="y")
def data_processor(n):
id = pd.read_csv('./subset_numerical/'+patient_id[n])
id_input = np.array([id['VALUE'].tolist()])
for s in sepsis_pat:
if str(s) == str(patient_id[n].split('.')[0]):
a = 1
try:
if a == 1:
a = 0
return [id_input, np.array([1])]
except:
return [id_input, np.array([0])]
def test_set():
id_combined = []
out = []
for p in range(300, len(patient_id)):
try:
id1 = pd.read_csv('./subset_numerical/' + patient_id[p])
id_input1 = np.array(id1['VALUE'].tolist())
id_combined.append(id_input1)
for s in sepsis_pat:
if str(s) == str(patient_id[p].split('.')[0]):
a = 1
try:
if a == 1:
a = 0
out.append([1, 0])
except:
out.append([0, 1])
except:
pass
return [np.array(id_combined), np.array(out)]
# Declaration of hidden layers and calculation of loss goes here
# Construction phase begins
with tf.name_scope("dnn"):
hidden1 = fully_connected(X, n_hidden1, scope="hidden1")
hidden2 = fully_connected(hidden1, n_hidden2, scope="hidden2")
logits = fully_connected(hidden2, n_outputs, scope="outputs", activation_fn=None) # We will apply softmax here later
# Calculating loss
with tf.name_scope("loss"):
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
# Training with gradient descent optimizer
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
# Measuring accuracy
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
accuracy_summary = tf.summary.scalar('accuracy', accuracy)
# Variable initialization and saving model goes here
# Construction is finished. Let's get this to work.
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
a = 0
for iteration in range(300 // batch_size):
X_batch, y_batch = data_processor(iteration)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
X_test, y_test = test_set()
acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
save_path = saver.save(sess, "./my_model_final.ckpt")
But I'm stuck with this error:
logits and labels must be same size: logits_size=[1,2] labels_size=[1,1]
The error seems to occur at this line:
correct = tf.nn.in_top_k(logits, y, 1)
What am I doing wrong?
Based on your error log provided, the problem is in this line of your code:
xentropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)
Ensure that both of them have same shape and dtype.
The shape should be of the format [batch_size, num_classes] and dtype should be of type float16, float32 or float64. Check the documentation of softmax_cross_entropy_with_logits for more details.
Since you've defined n_outputs = 2, the shape of logits is [?, 2] (? means batch size), while the shape of y is just [?]. In order to apply the softmax loss function, the last FC layer should return a flat tensor, which can be compared with y.
Solution: set n_outputs = 1.

IndexError: too many indices for array

I have recently started working with tensorflow and this is like my second piece of code and I am stuck while designing this neural network. I am not able to increment the batch size and this problem has been persisting for quite a while.
import numpy as np
import pandas as pd
import tensorflow as tf
import math
#importing the data and preprocessing it
dataset = pd.read_csv('C:\\Users\\Geeks_Sid\\Documents\\Deep-Learning-A-Z\Deep Learning A-Z\\Volume 1 - Supervised Deep Learning\\Part 1 - Artificial Neural Networks (ANN)\\Section 4 - Building an ANN\\Artificial_Neural_Networks\\Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
#creating a train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Creating layers for Neural network
n_nodes_hl1 = 1000
n_nodes_hl2 = 1000
n_nodes_hl3 = 1000
n_classes = 1
batch_size = 50
x = tf.placeholder('float', [None, 11])
y = tf.placeholder('float')
def neural_network_model(data):
hidden_1_layer = {'weights':tf.Variable(tf.random_normal([11, n_nodes_hl1])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes])),}
l1 = tf.add(tf.matmul(data,hidden_1_layer['weights']), hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weights']), hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2,hidden_3_layer['weights']), hidden_3_layer['biases'])
l3 = tf.nn.relu(l3)
output = tf.matmul(l3,output_layer['weights']) + output_layer['biases']
print("I was in neural netowrk m")
return output
def train_neural_network(x):
prediction = neural_network_model(x)
# OLD VERSION:
#cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y) )
# NEW:
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
# OLD:
for epoch in range(hm_epochs):
epoch_loss = 0
current = 0
for _ in range(80):
currentprev = current
current += 100
epoch_x, epoch_y = tuple(X_train[:,currentprev:current]) ,tuple(y_train[:,currentprev:current])
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:X_test, y:y_test}))
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
train_neural_network(x)
I am dealing with the error that looks like this.
train_neural_network(x)
I was in neural netowrk m
Traceback (most recent call last):
File "<ipython-input-8-7c7cbdae9b34>", line 1, in <module>
train_neural_network(x)
File "<ipython-input-7-b7e263fe7976>", line 20, in train_neural_network
epoch_x, epoch_y = tuple(X_train[:,currentprev:current]) ,tuple(y_train[:,currentprev:current])
IndexError: too many indices for array
I am trying a replicate a code of tensorflow MNIST dataset classification where they used this following piece of code. I hope you're able to compare this code with mine. If there are any corrections, please do help me
def train_neural_network(x):
prediction = neural_network_model(x)
# OLD VERSION:
#cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y) )
# NEW:
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 10
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
# OLD:
#sess.run(tf.initialize_all_variables())
# NEW:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))
As you can see, my code is quite similar to the one for MNIST but i am not able to return a particular tuple which is at this piece of code.
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
A thanks in advance. if you feel the question is repetitive, i want to explain that i couldn't find others relevant.
I don't understand very well the reshaping you're performing on your data, nor the original format of it but here y = dataset.iloc[:, 13].values it seems that y is a 1D-array while here tuple(y_train[:,currentprev:current] you are accessing it like a 2D matrix and the error is telling you that you are using too many (2) indices to index a 1D array.

Categories