I am currently trying to learn logistic regression, and am stuck on plotting a line from the weights after training. I am expecting an array of 3 values, but when I print the weights to check them, I get (with different values each time, but the same format):
[array([[ 0.42433906],
[-0.67847246]], dtype=float32)
array([-0.06681705], dtype=float32)]
My question, is why are the weights in this format of 2 arrays, rather than 1 array of length 3? And how do I interpret these weights so that I can plot the separating line?
Here is my code:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.regularizers import L1L2
import random
import numpy as np
# return the array data of shape (m, 2) and the array labels of shape (m, 1)
def get_random_data(w, b, mu, sigma, m): # slope, y-intercept, mean of the data, standard deviation, size of arrays
data = np.empty((m, 2))
labels = np.empty((m, 1))
# fill the arrays with random data
for i in range(m):
c = (random.random() > 0.5) # 0 with probability 1/2 and 1 with probability 1/2
n = random.normalvariate(mu, sigma) # noise using normal distribution
x_1 = random.random() # uniform distribution on [0, 1)
x_2 = w * x_1 + b + (-1)**c * n
labels[i] = c
data[i][0] = x_1
data[i][1] = x_2
# the train set is the first 80% of our data, and the test set is the following 20%
train_length = int(round(m * 0.8, 1))
train_data = np.empty((train_length, 2))
train_labels = np.empty((train_length, 1))
test_data = np.empty((m - train_length, 2))
test_labels = np.empty((m - train_length, 1))
for i in range(train_length):
train_data[i] = data[i]
train_labels[i] = labels[i]
for i in range(train_length, m):
test_data[i - train_length] = data[i]
test_labels[i - train_length] = labels[i]
return (train_data, train_labels), (test_data, test_labels)
(train_data, train_labels), (test_data, test_labels) = get_random_data(2,3,100,100,200)
model = Sequential()
model.add(Dense(train_labels.shape[1],
activation='sigmoid',
kernel_regularizer=L1L2(l1=0.0, l2=0.1),
input_dim=(train_data.shape[1])))
model.compile(optimizer='sgd',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(train_data, train_labels, epochs=100, validation_data=(test_data,test_labels))
weights = np.asarray(model.get_weights())
print("the weights are " , weights)
The first index of the array shows the weights of coefficients and the second array shows the bias.
So you have a equation like below.
h(x) = 0.42433906x1 + -0.67847246x2 + -0.06681705
Logistic regression takes this equation and applies sigmoid function to squeeze the results between 0-1.
So if you want to draw an equation of a line, you can use do it with the returned weights like I explained above.
Related
I am trying to use keras dense neural networks to forecast some time series.
When fitting my model on complex real datasets, my model converges toward a constant output, i.e. whatever the input, the model gives the same output (which seems to be a reasonable estimate of the mean of my dataset).
I reduced the problem up to very simple simulated datasets, and still have the same issue. Here is a minimal working example:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
X = []
Y = []
for jh in range(10000):
x = np.arange(-1, 1, 0.01)
y = 1+x*((np.random.random()-0.5))
y += np.random.randn(len(x))/(100)
X.append(y[:100])
Y.append(y[100:])
X = np.array(X)[:,:,None]
Y = np.array(Y)[:,:,None]
model = models.Sequential()
model.add(layers.Input((100,1,)))
model.add(layers.Flatten())
model.add(layers.Dense(100, activation='sigmoid'))
model.add(layers.Dense(100, activation='sigmoid'))
model.add(layers.Dense(100, activation='sigmoid'))
model.add(tf.keras.layers.Reshape((100,1)))
model.compile(loss = tf.keras.losses.MeanSquaredError(),optimizer="adam")
# model.summary()
print("Fit model on training data")
print("Fit model on training data")
history = model.fit(x=X, y=Y, batch_size=10000, epochs=200)
for k in np.arange(0,10000,1000):
plt.plot(np.arange(len(X[k])), X[k])
plt.plot(np.arange(len(X[k]), len(X[k])+len(Y[k])), model(X)[k])
plt.plot(np.arange(len(X[k]), len(X[k])+len(Y[k])), Y[k])
In this example, the model returns exactly same output regardless of the input.
I tried to change the number of layers, the loss function, the learning rate, the batch size and the number of epochs, without any noticeable improvement.
Do you have any suggestion on this issue?
If you rearrange your random inputs to be like
y = np.array(1. + x)
y += 1. / 100.
also
J, K = [] , []
for jh in range(10000):
j = np.arange(-1, 1, 0.01)
k = -np.array(1. - j)
k += 1. / 100
J.append(k[:100])
K.append(k[100:])
J = np.array(J)[:, :, None]
K = np.array(K)[:, :, None]
and finally add
plt.plot(np.arange(len(X[k]), len(X[k]) + len(Y[k])), model(J)[k])
in the plotting loop, then you will see two different results. Probably you should check your datasets diversity.
I am training a neural network to calculate the inverse of a 3x3 matrix. I am using a Keras dense model with 1 layer and 9 neurons. The activation function on the first layer is 'relu' and linear on the output layer. I am using 10000 matrices of determinant 1. The results I am getting are not very good (RMSE is in the hundreds). I have been trying more layers, more neurons, and other activation functions, but the gain is very small. Here is the code:
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
def generator(nb_samples, matrix_size = 2, entries_range = (0,1), determinant = None):
'''
Generate nb_samples random matrices of size matrix_size with float
entries in interval entries_range and of determinant determinant
'''
matrices = []
if determinant:
inverses = []
for i in range(nb_samples):
matrix = np.random.uniform(entries_range[0], entries_range[1], (matrix_size,matrix_size))
matrix[0] *= determinant/np.linalg.det(matrix)
matrices.append(matrix.reshape(matrix_size**2,))
inverses.append(np.array(np.linalg.inv(matrix)).reshape(matrix_size**2,))
return np.array(matrices), np.array(inverses)
else:
determinants = []
for i in range(nb_samples):
matrix = np.random.uniform(entries_range[0], entries_range[1], (matrix_size,matrix_size))
determinants.append(np.array(np.linalg.det(matrix)).reshape(1,))
matrices.append(matrix.reshape(matrix_size**2,))
return np.array(matrices), np.array(determinants)
### Select number of samples, matrix size and range of entries in matrices
nb_samples = 10000
matrix_size = 3
entries_range = (0, 100)
determinant = 1
### Generate random matrices and determinants
matrices, inverses = generator(nb_samples, matrix_size = matrix_size, entries_range = entries_range, determinant = determinant)
### Select number of layers and neurons
nb_hidden_layers = 1
nb_neurons = matrix_size**2
activation = 'relu'
### Create dense neural network with nb_hidden_layers hidden layers having nb_neurons neurons each
model = Sequential()
model.add(Dense(nb_neurons, input_dim = matrix_size**2, activation = activation))
for i in range(nb_hidden_layers):
model.add(Dense(nb_neurons, activation = activation))
model.add(Dense(matrix_size**2))
model.compile(loss='mse', optimizer='adam')
### Train and save model using train size of 0.66
history = model.fit(matrices, inverses, epochs = 400, batch_size = 100, verbose = 0, validation_split = 0.33)
### Get validation loss from object 'history'
rmse = np.sqrt(history.history['val_loss'][-1])
### Print RMSE and parameter values
print('''
Validation RMSE: {}
Number of hidden layers: {}
Number of neurons: {}
Number of samples: {}
Matrices size: {}
Range of entries: {}
Determinant: {}
'''.format(rmse,nb_hidden_layers,nb_neurons,nb_samples,matrix_size,entries_range,determinant))
I have checked online and there seem to be papers dealing with the problem of inverse matrix approximation. However, before changing the model I would like to know if there would be other parameters I could change that could have a bigger impact on the error. I hope someone can provide some insight. Thank you.
Inverting a 3x3 matrix is pretty difficult for a neural network, as they tend to be bad at multiplying or dividing activations. I wasn't able to get it to work with a simple dense network, but a 7 layer resnet does the trick. It has millions of weights so it needs many more than 10000 examples: I found that it completely memorized up to 100,000 samples and badly overfit even with 10,000,000 samples, so I just generated samples continuously and fed each sample to the network once as it was generated.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
#too_small_model = tf.keras.Sequential([
# tf.keras.layers.Flatten(),
# tf.keras.layers.Dense(1500, activation="relu"),
# tf.keras.layers.Dense(1500, activation="relu"),
# tf.keras.layers.Dense(N * N),
# tf.keras.layers.Reshape([ N, N])
#])
N = 3
inp = tf.keras.layers.Input(shape=[N, N])
x = tf.keras.layers.Flatten()(inp)
x = tf.keras.layers.Dense(128, activation="relu")(x)
for _ in range(7):
skip = x
for _ in range(4):
y = tf.keras.layers.Dense(256, activation="relu")(x)
x = tf.keras.layers.concatenate([x, y])
#x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(128,
kernel_initializer=tf.keras.initializers.Zeros(),
bias_initializer=tf.keras.initializers.Zeros()
)(x)
x = skip + x
#x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(N * N)(x)
x = tf.keras.layers.Reshape([N, N])(x)
model2 = tf.keras.models.Model(inp, x)
model2.compile(loss="mean_squared_error", optimizer=tf.keras.optimizers.Adam(learning_rate=.00001))
for _ in range(5000):
random_matrices = np.random.random((1000000, N, N)) * 4 - 2
random_matrices = random_matrices[np.abs(np.linalg.det(random_matrices)) > .1]
inverses = np.linalg.inv(random_matrices)
inverses = inverses / 5. # normalize target values, large target values hamper training
model2.fit(random_matrices, inverses, epochs=1, batch_size=1024)
zz = model2.predict(random_matrices[:10000])
plt.scatter(inverses[:10000], zz, s=.0001)
print(random_matrices[76] # zz[76] * 5)
I am developing a custom model in Tensorflow. I am trying to implement a Virtual Adversarial Training (VAT) model from https://arxiv.org/abs/1704.03976. The model makes use of both labeled and unlabeled data in its classification task. Therefore, in the train_step of the model, I need to divide the data of the batch into labeled (0, or 1), or unlabeled (-1). It seems to work as expected when compiling the model using run_eagerly=True, but when I use run_eagerly=False, it gives me the following error:
ValueError: Number of mask dimensions must be specified, even if some dimensions are None. E.g. shape=[None] is ok, but shape=None is not.
which seems to be produced in:
X_l, y_l = tf.boolean_mask(X, tf.logical_not(missing)), tf.boolean_mask(y, tf.logical_not(missing))
I am not sure what is causing the error, but it seems to have something to do with a weird tensor shape issues that only occur during run_eagerly=False. I need the boolean_mask functionality in order to distinguish the labeled and unlabeled data. I hope someone can help me out. In order to reproduce the errors, I added the model, and a small simulation example. The simulation will produce the error I have, when run_eagerly=False is set.
Thanks in advance.
Model defintion:
from tensorflow import keras
import tensorflow as tf
metric_acc = keras.metrics.BinaryAccuracy()
metric_loss = keras.metrics.Mean('loss')
class VAT(keras.Model):
def __init__(self, units_1=16, units_2=16, dropout=0.3, xi=1e-6, epsilon=2.0, alpha=1.0):
super(VAT, self).__init__()
# Set model parameters
self.units_1 = units_1
self.units_2 = units_2
self.dropout = dropout
self.xi = xi
self.epsilon = epsilon
self.alpha = alpha
# First hidden
self.dense1 = keras.layers.Dense(self.units_1)
self.activation1 = keras.layers.Activation(tf.nn.leaky_relu)
self.dropout1 = keras.layers.Dropout(self.dropout)
# Second hidden
self.dense2 = keras.layers.Dense(self.units_2)
self.activation2 = keras.layers.Activation(tf.nn.leaky_relu)
self.dropout2 = keras.layers.Dropout(self.dropout)
# Output layer
self.dense3 = keras.layers.Dense(1)
self.activation3 = keras.layers.Activation("sigmoid")
def call(self, inputs, training=None, mask=None):
x1 = self.dense1(inputs)
x2 = self.activation1(x1)
x3 = self.dropout1(x2, training=True)
x4 = self.dense2(x3)
x5 = self.activation2(x4)
x6 = self.dropout2(x5, training=True)
x7 = self.dense3(x6)
x8 = self.activation3(x7)
return x8
def generate_perturbation(self, inputs):
# Generate normal vectors
d = tf.random.normal(shape=tf.shape(inputs))
# Normalize vectors
d = tf.math.l2_normalize(d, axis=1)
# Calculate r
r = self.xi * d
# Make predictions
p = self(inputs, training=True)
# Tape gradient
with tf.GradientTape() as tape:
tape.watch(r)
# Perturbed predictions
p_perturbed = self(inputs + r, training=True)
# Calculate divergence
D = keras.losses.KLD(p, p_perturbed) + keras.losses.KLD(1 - p, 1 - p_perturbed)
# Calculate gradient
gradient = tape.gradient(D, r)
# Calculate r_vadv
r_vadv = tf.math.l2_normalize(gradient, axis=1)
# Return virtual adversarial perturbation
return r_vadv
#tf.function
def train_step(self, data):
# Unpack data
X, y = data
# Missing label boolean indices
missing = tf.squeeze(tf.equal(y, -1))
# Split data into labeled and unlabeled data
X_l, y_l = tf.boolean_mask(X, tf.logical_not(missing)), tf.boolean_mask(y, tf.logical_not(missing))
X_u = tf.boolean_mask(X, missing)
# Calculate virtual perturbations for labeled and unlabeled
r_l = self.generate_perturbation(X_l)
r_u = self.generate_perturbation(X_u)
# Tape gradient
with tf.GradientTape() as model_tape:
model_tape.watch(self.trainable_variables)
# Calculate probabilities real data
prob_l, prob_u = self(X_l, training=True), self(X_u, training=True)
# Calculate probabilities perturbed data
prob_r_l, prob_r_u = self(X_l + self.epsilon * r_l, training=True), self(X_u + self.epsilon * r_u, training=True)
# Calculate loss
loss = vat_loss(y_l, prob_l, prob_u, prob_r_l, prob_r_u, self.alpha)
# Calculate gradient
model_gradient = model_tape.gradient(loss, self.trainable_variables)
# Update weights
self.optimizer.apply_gradients(zip(model_gradient, self.trainable_variables))
# Compute metrics
metric_acc.update_state(y_l, prob_l)
metric_loss.update_state(loss)
return {'loss': metric_loss.result(), 'accuracy': metric_acc.result()}
#property
def metrics(self):
return [metric_loss, metric_acc]
def vat_loss(y_l, prob_l, prob_u, prob_r_l, prob_r_u, alpha):
N_l = tf.cast(tf.size(prob_l), dtype=tf.dtypes.float32)
N_u = tf.cast(tf.size(prob_u), dtype=tf.dtypes.float32)
if tf.equal(N_l, 0):
# No labeled examples: get contribution from unlabeled data using perturbations
R_vadv = tf.reduce_sum(
keras.losses.KLD(prob_u, prob_r_u)
+ keras.losses.KLD(1 - prob_u, 1 - prob_r_u)
)
return alpha * R_vadv / N_u
elif tf.equal(N_u, 0):
# No unlabeled examples: get contribution from labeled data
R = tf.reduce_sum(keras.losses.binary_crossentropy(y_l, prob_l))
R_vadv = tf.reduce_sum(
keras.losses.KLD(prob_l, prob_r_l)
+ keras.losses.KLD(1 - prob_l, 1 - prob_r_l)
)
return R / N_l + alpha * R_vadv / N_l
else:
# Get contribution from labeled data
R = tf.reduce_sum(keras.losses.binary_crossentropy(y_l, prob_l))
# Get contribution from labeled and unlabeled data using perturbations
R_vadv = tf.reduce_sum(
keras.losses.KLD(prob_l, prob_r_l)
+ keras.losses.KLD(1 - prob_l, 1 - prob_r_l)
) + tf.reduce_sum(
keras.losses.KLD(prob_u, prob_r_u)
+ keras.losses.KLD(1 - prob_u, 1 - prob_r_u)
)
return R / N_l + alpha * R_vadv / (N_l + N_u)
Simulation example:
To show that the model/code works as desired (when using run_eagerly=True, I made a simulation example. In this example, I bias when observations are labeled/unlabeled. The figure below illustrates the labeled observations used by the model (yellow or purple), and the unlabeled observations (blue).
The VAT produces an accuracy of around ~0.75, whereas the reference model produces an accuracy of around ~0.58. These accuracies are produced without hyperparameter tuning.
from modules.vat import VAT
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
def create_biased_sample(x, proportion_labeled):
labeled = np.random.choice([True, False], p=[proportion_labeled, 1-proportion_labeled])
if x[0] < 0.0:
return False
elif x[0] > 1.0:
return False
else:
return labeled
# Simulation parameters
N = 2000
proportion_labeled = 0.15
# Model training parameters
BATCH_SIZE = 128
BUFFER_SIZE = 60000
EPOCHS = 100
# Generate a dataset
X, y = datasets.make_moons(n_samples=N, noise=.05, random_state=3)
X, y = X.astype('float32'), y.astype('float32')
y = y.reshape(-1, 1)
# Split in train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5)
# Simulate missing labels
sample_biased = lambda x: create_biased_sample(x, proportion_labeled)
labeled = np.array([sample_biased(k) for k in X_train])
y_train[~ labeled] = -1
# Estimate VAT model
vat = VAT(dropout=0.2, units_1=16, units_2=16, epsilon=0.5)
vat.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), run_eagerly=True)
vat.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, shuffle=True)
# Estimate a reference model
reference = keras.models.Sequential([
keras.layers.Input(shape=(2,)),
keras.layers.Dense(16),
keras.layers.Activation(tf.nn.leaky_relu),
keras.layers.Dropout(0.2),
keras.layers.Dense(16),
keras.layers.Activation(tf.nn.leaky_relu),
keras.layers.Dropout(0.2),
keras.layers.Dense(1),
keras.layers.Activation("sigmoid")
])
reference.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01), loss=keras.losses.binary_crossentropy, run_eagerly=False)
reference.fit(X_train[y_train.flatten() != -1, :], y_train[y_train.flatten() != -1], batch_size=BATCH_SIZE, epochs=EPOCHS, shuffle=True)
# Calculate out-of-sample accuracies
test_acc_vat = tf.reduce_mean(keras.metrics.binary_accuracy(y_test, vat(X_test, training=False)))
test_acc_reference = tf.reduce_mean(keras.metrics.binary_accuracy(y_test, reference(X_test, training=False)))
# Print results
print('Test accuracy of VAT: {}'.format(test_acc_vat))
print('Test accuracy of reference model: {}'.format(test_acc_reference))
# Plot scatter
plt.scatter(X_test[:, 0], X_test[:, 1])
plt.scatter(X_train[y_train.flatten() != -1, 0], X_train[y_train.flatten() != -1, 1], c=y_train.flatten()[y_train.flatten() != -1])
For anyone who is interested, I solved the issue by adding the following in the train_step() method:
missing.set_shape([None])
It should be just after declaring the tensor missing. I solved this using this thread: Tensorflow boolean_mask with dynamic mask.
I am working on applying DL to a regression problem and some of the outputs need to be integers while others can be floats. So far I have built a NN which returns floats for all but I want to go to the next step and actually return ints vs floats for the different outputs.
Previously I asked a question where I provided a simple example of regression for y = m * x + b which I was able to solve on my own. In this example, how would the code be changed to ensure b is integer while m is float?
#!/usr/bin/env python3
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
#################
### CONSTANTS ###
#################
ARANGE = (-5.0, 5.0) # Possible values for m in training data
BRANGE = (0.0, 10.0) # Possible values for b in training data
X_MIN = 1.0
X_MAX = 9.0
N = 10 # Number of grid points
M = 2 # Number of {(x,y)} sets to train on
def gen_ab(arange, brange):
""" mrange, brange are tuples of floats """
a = (arange[1] - arange[0])*np.random.rand() + arange[0]
b = (brange[1] - brange[0])*np.random.rand() + brange[0]
return (a, b)
def build_model(x_data, y_data):
""" Build the model using input / output training data
Args:
x_data (np array): Size (m, n*2) grid of input training data.
y_data (np array): Size (m, 2) grid of output training data.
Returns:
model (Sequential model)
"""
model = keras.Sequential()
model.add(layers.Dense(64, activation='relu', input_dim=len(x_data[0])))
model.add(layers.Dense(len(y_data[0])))
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
return model
def gen_data(xs, arange, brange, m):
""" Generate training data for lines of y = m*x + b
Args:
xs (list): Grid points (size N1)
arange (tuple): Range to use for a (a_min, a_max)
brange (tuple): Range to use for b (b_min, b_max)
m (int): Number of y grids to generate
Returns:
x_data (np array): Size (m, n*2) grid of input training data.
y_data (np array): Size (m, 2) grid of output training data.
"""
n = len(xs)
x_data = np.zeros((m, 2*n))
y_data = np.zeros((m, 2))
for ix in range(m):
(a, b) = gen_ab(arange, brange)
ys = a*xs + b*np.ones(xs.size)
x_data[ix, :] = np.concatenate((xs, ys))
y_data[ix, :] = [a, b]
return (x_data, y_data)
def main():
""" Main routin """
# Generate the x axis grid to be used for all training sets
xs = np.linspace(X_MIN, X_MAX, N)
# Generate the training data
# x_train has M rows (M is the number of training samples)
# x_train has 2*N columns (first N columns are x, second N columns are y)
# y_train has M rows, each of which has two columns (a, b) for y = ax + b
(x_train, y_train) = gen_data(xs, ARANGE, BRANGE, M)
model = build_model(x_train, y_train)
model.fit(x_train, y_train, epochs=10, batch_size=32)
model.summary()
####################
### Test example ###
####################
(a, b) = gen_ab(ARANGE, BRANGE)
ys = a*xs + b*np.ones(xs.size)
rys = np.concatenate((xs, ys))
ab1 = model.predict(x_train)
ab2 = model.predict(np.array([rys]))
if __name__ == "__main__":
main()
I think this would be possible but actually not as trivial as it sounds. You unfortunately can't simply get the NN to output an int and a float and use the normal MSE loss you are using as the discreet nature of the int values prevents the loss function being continuously differentiable like the optimisers need.
If one really wanted to do it they could treat the int output variable as if it were actually a multi class output (treating the float output the same). You would need to craft a loss function out of the combination of these two outputs (multi class + float). You could one-hot-encode and then softmax the multi class outputs. An interesting complication of this is that the neural network would not know that the multi class output is actually ordinal (ordered, since 1<2<3<4 etc.). There have been interesting attempts in the past to help NNs to realise this (see Neural Network Ordinal Classification for Age).
I'm running a simple univariate logistic regression program written in Tensorflow. I can't, however, get my shapes properly from my training set to the x placeholder. I've been trying various methods to do so, I'm always getting the error:
ValueError: Cannot feed value of shape (70,) for Tensor 'Placeholder_1:0', which has shape '(?, 1)'
This is from the tf.matmul command.
Printing the shapes of the relevant variables:
w = <tf.Variable 'Variable:0' shape=(1, 1) dtype=float32_ref>
x = Tensor("Reshape:0", shape=(?, 1), dtype=float32)
train_x = (70, 1)
The problem seems to be that even through the train_x array has a shape of (70, 1) TF doesn't seem to recognize this.
How do I fix this problem? I've been trying the reshape command, but without success.
Here's the code.
#!/usr/bin/env python3
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import model_selection
import sys
gender_df = pd.read_csv('data/binary_data.csv',dtype = {col: np.float32 for col in ['HEIGHT'] })
print (gender_df.info())
# Shuffle our data
gender_df = gender_df.sample(frac=1).reset_index(drop=True)
num_features = 1
num_classes = 1
# We'll go ahead and split the data set into training and testing parts.
# 70 per cent will go to training, the rest to testing.
train_x,test_x, train_y, test_y = model_selection.train_test_split(gender_df['HEIGHT'],gender_df['GENDER'],test_size = 0.3)
n_samples = train_x.shape[0]
# These will be the placeholders for the testing and training data
x = tf.placeholder('float',[None,num_features+1])
y = tf.placeholder('float',[None,num_classes])
# Variables for the weight and bias.
W = tf.Variable(tf.zeros([num_features, num_classes]))
b = tf.Variable(tf.zeros([num_classes]))
x = tf.reshape(x,[-1, num_features])
train_x = train_x.values.reshape(-n_samples,num_classes)
print ('w = ',W)
print ('x = ',x)
print ('train_x = ', train_x.shape)
# This is our activation function to determine the probability
# of our gender based on height.
#activation = tf.nn.sigmoid((W * x) + b)
activation = tf.nn.softmax(tf.add(tf.matmul(x,W), b))
# Set our alpha value for the optimizer.
learning_rate = 0.001
# cross_entropy is our cost function.
cross_entropy = tf.reduce_mean(-(y*tf.log(activation) + (1 - y) * tf.log(1-activation)))
# We'll use a standard gradient descent optimizer.
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Now train our jodel.
for epoch in range(1000):
_,l = sess.run([train_step, cross_entropy], feed_dict = {x: train_x, y:train_y})
if epoch % 50 == 0:
print ('loss = %f' %(l))
# Now let's see how our model performed on the test data.
correct = tf.equal(tf.argmax(activation,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
print ('Accuracy: ', sess.run(accuracy,feed_dict = {x: test_x, y:test_y}))
You are printing the shape of your x placeholder and train_x batch, but what about labels? Your x placeholder is of the shape (?, 2), so it seems that the error is not referring to x, it is referring to y, which is (?, 1). Check the shape of your train_y variable.