How can I implement such linear classifier in TensorFlow:
x1*w1 + x2*w2 + x3*w3 = y_pred,
where x1, x2, x3 - vectors and w1, w2 and w3 - scalars?
I have nice tutorial for case where x1, x2, x3 - scalars (link),
but for case where x1, x2, x3 are vectors I have no realization ideas.
UPDATE
That is I am trying to implement the following model:
x1*w1+ x2*w1+x3*w1+x4*w2+x5*w2+x6*w2+x7*w3+x8*w3+x9*w3=y_pred,
where x1..x9 and w1..w9 are scalars.
The linear multiclass classifier to be implemented:
pred = w1 * (x1 + x2 + x3) + w2 * (x4 + x5 + x6) + w3 * (x7 + x8 + x9)
in which all variables are scalars.
In this model, since pred is a scalar, you cannot use cross-entropy loss for training the classifier (pred is not a distribution). You have to treat it as a regression problem.
Example dataset
import numpy as np
x1 = np.ones((100, 3)) # for w1
x2 = np.ones((100, 3)) * 2 # for w2
x3 = np.ones((100, 3)) * 3 # for w3
# set(y) is {0, 1, 2, 3}, corresponds to the four class labels
y = np.random.randint(0, 4, 100).reshape(-1, 1)
Example tensorflow code:
import tensorflow as tf
tf.reset_default_graph()
f1 = tf.placeholder('float32', shape=[None, 3], name='f1')
f2 = tf.placeholder('float32', shape=[None, 3], name='f2')
f3 = tf.placeholder('float32', shape=[None, 3], name='f3')
target = tf.placeholder('float32', shape=[None, 1], name='target')
# the three scalars
w1 = tf.get_variable('w1', shape=[1], initializer=tf.random_normal_initializer())
w2 = tf.get_variable('w2', shape=[1], initializer=tf.random_normal_initializer())
w3 = tf.get_variable('w3', shape=[1], initializer=tf.random_normal_initializer())
pred_1 = tf.reduce_sum(tf.multiply(f1, w1), axis=1)
pred_2 = tf.reduce_sum(tf.multiply(f2, w2), axis=1)
pred_3 = tf.reduce_sum(tf.multiply(f3, w3), axis=1)
# till now the linear classifier has been constructed
# pred = w1(x1 + x2 + x3) + w2(x4 + x5 + x6) + w3(x7 + x8 + x9)
pred = tf.add_n([pred_1, pred_2, pred_3])
# treat it as a regression problem
loss = tf.reduce_mean(tf.square(pred - target))
optimizer = tf.train.GradientDescentOptimizer(1e-5)
updates = optimizer.minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for t in range(50):
loss_val, _ = sess.run([loss, updates],
feed_dict={f1: x1, f2: x2, f3: x3, target: y})
print(t, loss_val)
Below is a simple example uses cross-entropy loss for training a multiclass classifier. As you can notice, this model is a neural network model
import numpy as np
import tensorflow as tf
x1 = np.ones((100, 3)) # for w1
x2 = np.ones((100, 3)) * 2 # for w2
x3 = np.ones((100, 3)) * 3 # for w3
y = np.random.randint(0, 4, 400).reshape(100, 4)
tf.reset_default_graph()
f1 = tf.placeholder('float32', shape=[None, 3], name='f1')
f2 = tf.placeholder('float32', shape=[None, 3], name='f2')
f3 = tf.placeholder('float32', shape=[None, 3], name='f3')
target = tf.placeholder('float32', shape=[None, 4], name='target')
# the three scalars
w1 = tf.get_variable('w1', shape=[1], initializer=tf.random_normal_initializer())
w2 = tf.get_variable('w2', shape=[1], initializer=tf.random_normal_initializer())
w3 = tf.get_variable('w3', shape=[1], initializer=tf.random_normal_initializer())
w = tf.get_variable('w', shape=[3, 4], initializer=tf.random_normal_initializer())
pred_1 = tf.reduce_sum(tf.multiply(f1, w1), axis=1)
pred_2 = tf.reduce_sum(tf.multiply(f2, w2), axis=1)
pred_3 = tf.reduce_sum(tf.multiply(f3, w3), axis=1)
pred = tf.stack([pred_1, pred_2, pred_3], axis=1)
pred = tf.matmul(pred, w)
loss = tf.losses.softmax_cross_entropy(onehot_labels=target, logits=pred)
optimizer = tf.train.GradientDescentOptimizer(1e-5)
updates = optimizer.minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for t in range(50):
loss_val, _ = sess.run([loss, updates],
feed_dict={f1: x1, f2: x2, f3: x3, target: y})
print(t, loss_val)
I used created an array that looks like [w1, w1, w1, w2, w2, w2 ...] and multiplied it (element-wise) by x before summing all terms up. I could not get model.fit to work so I copied the train_step code from https://www.tensorflow.org/tutorials/quickstart/advanced. It seems to work just fine. I left my test code at the bottom for you to inspect.
This makes use of tensorlfow 2.0 and the intergration with keras models
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
print(tf.executing_eagerly())
class ProductAdd(Model):
def __init__(self):
super(ProductAdd, self).__init__()
self.vars = list(np.empty([3])) # Creates an empty list (same as [ , , ])
for i in range(3):
self.vars[i] = tf.Variable( # Creates 3 variables to act as weights
np.random.standard_normal(), # Assigns variables random value to start
name='var'+str(i)) # Names them var0 var1...
def call(self, x):
extended_vars = [self.vars[int(np.floor(i/3))] # "Extends" var array to look like:
for i in range(9)] # [w1, w1, w1, w2, w2, w2, w3, w3, w3]
return np.sum(np.multiply(x, extended_vars)) # Perfoms element-wise multiplication on x and sums
loss_object = MeanSquaredError() # Create loss and optimizer
optimizer = Adam()
#tf.function # This function perfoms trains the model
def train_step(images, labels): # I got it from https://www.tensorflow.org/tutorials/quickstart/advanced
with tf.GradientTape() as tape:
predictions = model(images)
loss = loss_object(labels, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
model = ProductAdd()
for _ in range(100):
train_step([1.0, 2.0 ,3.0 ,4.0, 5.0, 6.0, 7.0, 8.0, 9.0], [0.0])
print(model([1.0, 2.0 ,3.0 ,4.0, 5.0, 6.0, 7.0, 8.0, 9.0]).numpy())
This question is ill-posed. You say you want x_1, x_2, x_3 to be vectors, however it's not clear what you would do with w_1, w_2, w_3. There are two possibilities.
If you want to keep them as scalars, as your question seems to imply, then the model is not really a vector model, you're just doing the same scalar operation on all the entries of the x vectors, but at once. This is equivalent to a scalar model.
Otherwise, you can define w_1, w_2, w_3 as matrices, or row vectors, if the label is scalar. In this case, there is no reason to write the equation as you wrote it, because you could stack the xs in a single vector and the ws in a single vector and write wx = y. In any case, this is a multivariate linear regression, of which you can find many examples, and tutorials on how to solve it in Tensorflow and Torch.
Update, given OP's clarification
In your comment, you now say you're interested in solving the following equation:
w1*(x1 + x2 + x3) + w2*(x4 + x5 + x6) + w3*(x7 + x8 + x9) == y
where all variables are scalars. Note that the x variables are known, so we can define (a simple arithmetic operation):
z1 = x1 + x2 + x3; z2 = x4 + x5 + x6; z3 = x7 + x8 + x9
And the equation becomes
w1*z1 + w2*z2 + w3*z3 = y.
So this is more like a linear algebra question rather than a tensorflow/torch question, because this equation can be solved analytically, and does not require numerical fitting. However, it is still ill-defined, because it has 3 unknowns (w1, w2, w3) for one linear equation. So it will not have a unique solution, but a two-dimensional linear space of solutions (it identifies a plane in the 3-dimensional w-space). To get some solutions, you can arbitrarily decide to set, for example, w1 = w2 = 0, from which you automatically get w3 = z3/y. Then do the same for the other two, and you'll get three different and linearly independent solutions.
Hope this helps. In summary, you don't need code at all.
Second update (from comment)
Why does it need to solved using optimization? If the problem is as you presented it, it clearly does not. Unless you mean you have many values for the Xs and Ys. In that case, you're doing multivariate linear regression. MLR can be solved using ordinary least squares, see for example https://towardsdatascience.com/simple-and-multiple-linear-regression-in-python-c928425168f9
Related
I am new to data science industry and have been working on my second project. I am implementing gradient descent to my linear regression to optimize my cost model, but the output from my C variable seemed very strange. Can someone please tell me if I coded this correctly or is there something I am doing wrong? Below is my code:
#Model Creation
import torch
import random from sklearn.preprocessing
import StandardScaler
input1 = (MarginMerged.drop(['BTC', 'Date'], 1))
input2 = torch.tensor(inputs.astype(float).values)
output = torch.tensor([MarginMerged['BTC']])
sc=StandardScaler()
standardized_x = sc.fit_transform(input2) #Standardizing for better convergence of Gradients
w1 = torch.randn(1, requires_grad = True) #Randomizing weights and allowing tracking of gradients
w2 = torch.randn(1, requires_grad = True)
w3 = torch.randn(1, requires_grad = True)
w4 = torch.randn(1, requires_grad = True)
b = torch.randn(1, requires_grad = True)
def regression(x1, w1, x2, w2, x3, w3, x4, w4, b): #Simple regression
function y = x1 * w1 + x2 * w2 + x3 * w3 + x4 * w4 + b return y
#Evaluating Model and Cost Function
yhat = regression(input2[:, 0], w1, input2[:, 1], w2, input2[:, 2], w3, input2[:, 3], w4, b)
def MSE(yhat, y):
sigma = torch.sum((yhat - y) **2) return sigma / len(y)
C = MSE(yhat, output)
Output:
tensor([ -766136.2741, -910269.9667, -827688.1826, -825344.9633,
-886781.1021, -939034.5842, -999291.1625, -996561.4835,
-1355946.4095, -1101955.2536, -1142216.8116, -1212749.9003,
-1737812.1974, -1621638.2128, -1758945.7766, -2119769.5569,
-2590650.6940, -3916453.0230, -4122350.1070, -9693393.5401,
-13584547.6533, -17504209.3513, -24114284.5156, -22302542.7874,
-19064275.4255, -35323125.1089, -86678884.6189, -66240221.4384,
-41395859.9706, -34334016.6174, -18819275.6493, -16508314.3713,
-9682844.7546, -14240738.2622, -11479996.4053, -9299528.3805,
-9841421.4224, -15140955.9555, -10965320.5364, -6465157.0859,
-6044804.7645, -4984105.2754, -7711268.3907, -11424855.3688,
-12303663.6678, -14704509.5725, -9483839.7203, -7645405.3528,
-5656487.9378, -5047090.0958, -3954263.1534, -5806911.3040,
-8229332.1597, -12360791.8777, -11092046.1287], dtype=torch.float64,
*Output should be positive due to implementing MSE*
When using tensorflow, how to print some intermediate tensor's value in some function? For example:
import numpy as np
import tensorflow as tf
def f(X):
tf.set_random_seed(1)
W1 = tf.get_variable('W1',[4, 4, 3, 8], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
Z1 = tf.nn.conv2d(X,W1, strides = [1,1,1,1], padding = 'SAME')
return Z1
with tf.Session() as sess:
np.random.seed(1)
X=tf.placeholder(tf.float32, shape=[None, 64, 64, 3])
Z1 = f(X)
init = tf.global_variables_initializer()
sess.run(init)
a = sess.run(Z1, {X: np.random.randn(2,64,64,3)})
print("Z1 = " + str(a))
How to print the concrete values of tensor W1, X when compute Z1? I need the values of W1 and X to debug.
PS: I'm using Jupyter Notebook, TensorFlow 1.15
There's three ways.
Changing the arguments of your method
def f(X):
tf.set_random_seed(1)
W1 = tf.get_variable('W1',[4, 4, 3, 8], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
Z1 = tf.nn.conv2d(X,W1, strides = [1,1,1,1], padding = 'SAME')
return Z1, W1
with tf.Session() as sess:
np.random.seed(1)
X=tf.placeholder(tf.float32, shape=[None, 64, 64, 3])
W1, Z1 = f(X)
init = tf.global_variables_initializer()
sess.run(init)
w, x, a = sess.run([W1, X, Z1], {X: np.random.randn(2,64,64,3)})
print("Z1 = " + str(a))
print('W = ', w)
print('X = ', x)
Without changing the arguments of your method
...
X=tf.placeholder(tf.float32, shape=[None, 64, 64, 3])
Z1 = f(X)
init = tf.global_variables_initializer()
sess.run(init)
with tf.variable_scope('',reuse=True) as scope:
W1 = tf.get_variable('W1')
w, x, a = sess.run([W1, X, Z1], {X: np.random.randn(2,64,64,3)})
print("Z1 = " + str(a))
print('W = ', w)
print('X = ', x)
Or you can use eager execution instead of graph execution. I think this would be the best way to use TF for debugging as printing/debugging values with Graph execution is clunky.
In tensorflow1.x, one way I know is use tf.enable_eager_execution, enable eager model, then you can use tf.tensor just like numpy.
I am working on a model where I have to classify my data into two classes. Most of the codes use tf.nn.sigmoid_cross_entropy_with_logits for calculating cross entropy for binary classification.
When I use the same function to I train my model, I am getting negative values of entropy. I want to ask if I can use tf.nn.softmax_cross_entropy_with_logits to overcome the negative entropy?
x = tf.placeholder(tf.float32, [None, Pixels])
W1 = tf.Variable(tf.random_normal([Pixels, Nodes1], stddev=0.01))
b1 = tf.Variable(tf.zeros([Nodes1]))
y1 = tf.nn.sigmoid(tf.matmul(x, W1) + b1)
W2 = tf.Variable(tf.random_normal([Nodes1, Labels], stddev=0.01))
b2 = tf.Variable(tf.zeros([Labels]))
y = tf.matmul(y1, W2) + b2
cross_entropy =
tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y,
logits=y_))
I got a dataset of 178 elements, and each contains 13 features and 1 label.
Label is stored as one-hot array. My training dataset is made of 158 elements.
Here is what my model looks like :
x = tf.placeholder(tf.float32, [None,training_data.shape[1]])
y_ = tf.placeholder(tf.float32, [None,training_data_labels.shape[1]])
node_1 = 300
node_2 = 300
node_3 = 300
out_n = 3
#1
W1 = tf.Variable(tf.random_normal([training_data.shape[1], node_1]))
B1 = tf.Variable(tf.random_normal([node_1]))
y1 = tf.add(tf.matmul(x,W1),B1)
y1 = tf.nn.relu(y1)
#2
W2 = tf.Variable(tf.random_normal([node_1, node_2]))
B2 = tf.Variable(tf.random_normal([node_2]))
y2 = tf.add(tf.matmul(y1,W2),B2)
y2 = tf.nn.relu(y2)
#3
W3 = tf.Variable(tf.random_normal([node_2, node_3]))
B3 = tf.Variable(tf.random_normal([node_3]))
y3 = tf.add(tf.matmul(y2,W3),B3)
y3 = tf.nn.relu(y3)
#output
W4 = tf.Variable(tf.random_normal([node_3, out_n]))
B4 = tf.Variable(tf.random_normal([out_n]))
y4 = tf.add(tf.matmul(y3,W4),B4)
y = tf.nn.softmax(y4)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(200):
sess.run(optimizer,feed_dict={x:training_data, y_:training_data_labels})
correct = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:eval_data, y_:eval_data_labels}))
But the accuracy is very low, i tried increase the range 200 to some higher number but it still remains low.
What could I do to improve the results ?
The problem is that you're taking the softmax of y4 and then passing that to tf.nn.softmax_cross_entropy_with_logits. This error is common enough that there's actually a note about it in the documentation for softmax_cross_entropy_with_logits:
WARNING: This op expects unscaled logits, since it performs a softmax on logits internally
for efficiency. Do not call this op with the output of softmax, as it will produce
incorrect results.
The rest of your code looks fine, so just replace y4 with y and get rid of y = tf.nn.softmax(y4).
While studying the tensorflow, I faced a problem.
The cost function output 'nan'.
And, if you find any other wrong in source code let me know the links for it.
I am trying to send the cost function value to my trained model, but its not working.
tf.reset_default_graph()
tf.set_random_seed(777)
X = tf.placeholder(tf.float32, [None, 20, 20, 3])
Y = tf.placeholder(tf.float32, [None, 1])
with tf.variable_scope('conv1') as scope:
W1 = tf.Variable(tf.random_normal([4, 4, 3, 32], stddev=0.01), name='weight1')
L1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
L1 = tf.reshape(L1, [-1, 10 * 10 * 32])
W1_hist = tf.summary.histogram('conv_weight1', W1)
L1_hist = tf.summary.histogram('conv_layer1', L1)
with tf.name_scope('fully_connected_layer1') as scope:
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1], initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
hypothesis = tf.matmul(L1, W2) + b
W2_hist = tf.summary.histogram('fully_connected_weight1', W2)
b_hist = tf.summary.histogram('fully_connected_bias', b)
hypothesis_hist = tf.summary.histogram('hypothesis', hypothesis)
with tf.name_scope('cost') as scope:
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
cost_summary = tf.summary.scalar('cost', cost)
with tf.name_scope('train_optimizer') as scope:
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
accuracy_summary = tf.summary.scalar('accuracy', accuracy)
train_data_batch, train_labels_batch = tf.train.batch([train_data, train_labels], enqueue_many=True , batch_size=100, allow_smaller_final_batch=True)
with tf.Session() as sess:
# tensorboard --logdir=./logs/planesnet2_log
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter('./logs/planesnet2_log')
writer.add_graph(sess.graph)
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
total_cost = 0
for step in range(20):
x_batch, y_batch = sess.run([train_data_batch, train_labels_batch])
feed_dict = {X: x_batch, Y: y_batch}
_, cost_val = sess.run([optimizer, cost], feed_dict = feed_dict)
total_cost += cost_val
print('total_cost: ', total_cost, 'cost_val: ', cost_val)
coord.request_stop()
coord.join(threads)
You use a cross entropy loss without a sigmoid activation function to hypothesis, thus your values are not bounded in ]0,1]. The log function is not defined for negative values and it most likely get somes. Add a sigmoid and epsilon factor to avoid negative or 0 values and you should be fine.
As I know,
Cross entropy cost function assumes that the hypothesis which you want to predict is stochastic value. Because cross entropy uses log function and (1-Y_) formula. Therefore, cross entropy loss should be used only for stochastic cases.
So you have to use the softmax function to make the results of the hypothesis probability.
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
# hypothesis = tf.matmul(L1, W2) + b
hypothesis = tf.nn.softmax(tf.add(tf.matmul(L1, W2), b))
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
Or you can use this code
W2 = tf.get_variable('W2', shape=[10 * 10 * 32, 1],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([1]))
hypothesis = tf.matmul(L1, W2) + b
cost = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=hypothesis)