Is there a way to index an operation in tensorflow? In particular, I'm interested in indexing by the iterator variable of tf.while_loop.
More concretely, let's say I have my_ops = [op1, op2]. I would like to have:
my_ops = [...]
i = tf.constant(0)
c = lambda i: tf.less(i, 10)
b = lambda i: my_ops[i](...)
r = tf.while_loop(c, b, [i])
which unfortunately will not work, since a python array supports only integer indexing.
I believe this is not possible. However, you could instead use tf.stack to stack the operations' output tensors, and then use tf.gather to obtain the desired output.
Here you have an example:
import tensorflow as tf
def condition(i, x):
return tf.less(i, 10)
def body_1(my_ops):
def b(i, x):
stacked_results = tf.stack([op(x) for op in my_ops])
gather_idx = tf.mod(i, 2)
return [i + 1, tf.gather(stacked_results, gather_idx)]
return b
def body_2(my_ops):
def b(i, x):
nb_ops = len(my_ops)
pred_fn_pairs = [(tf.equal(tf.mod(i, nb_ops), 0), lambda: my_ops[0](x)),
(tf.equal(tf.mod(i, nb_ops), 1), lambda: my_ops[1](x))]
result = tf.case(pred_fn_pairs)
return [i + 1, result]
return b
my_ops = [lambda x: tf.Print(x + 1, [x, 1]),
lambda x: tf.Print(x + 2, [x, 2])]
i = tf.constant(0)
x = tf.constant(0)
r = tf.while_loop(condition, body_2(my_ops), [i, x]) # See the difference with body_1
with tf.Session() as sess:
i, x = sess.run(r)
print(x) # Prints 15 = 5*2 + 5*1
Related
I am creating a function that takes a tensor value and returns the result by applying the following formulation, There are 3 conditions so I am using #tf.functions.
def Spa(x):
x= tf.convert_to_tensor(float(x), dtype=tf.float32)
p= tf.convert_to_tensor(float(0.05), dtype=tf.float32)
p_dash=x
K = p*logp_dash
Ku=K.sum(Ku)
Ku= tf.convert_to_tensor(float(Ku), dtype=tf.float32)
y= tf.convert_to_tensor(float(0), dtype=tf.float32)
def a(): return tf.constant(0)
r = tf.case([(tf.less(x, y), a), (tf.greater(x, Ku), a)], default=x, exclusive=False)
return r
The code generates the following error:
'false_fn' must be callable.
I did many conversions, int to float and float to int but don't know what is the issue.
It should be something like this.
x = tf.where(x < 0, tf.zeros_like(x), x)
p = 0.05
p_hat = x
KL_divergence = p * (tf.math.log(p / p_hat)) + (1 - p) * (tf.math.log(1 - p / 1 - p_hat))
x = tf.where(x < KL_divergence, tf.zeros_like(x), x)
return x
In tf.case x should be a function not a tensor. As described in TF page.
def f1(): return tf.constant(17)
def f2(): return tf.constant(23)
def f3(): return tf.constant(-1)
r = tf.case([(tf.less(x, y), f1), (tf.greater(x, z), f2)], default=f3, exclusive=True)
You can see that f3 here is also a function.
My network is not trained to recognize inputs separately, it either outputs the averaged result or becomes biased to one particular output. What am I doing wrong?
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape) # The amount layers
self.shape = shape # The amount of neurons per each layer
self.weights = [
np.array([np.random.rand(shape[l - 1]) for _ in range(shape[l])])
for l in range(1, self.layers)
] # A list of matrices of weights connecting neighbouring layers
self.weighted_sums = [np.zeros(l) for l in shape]
self.activations = [np.zeros(l) for l in shape]
def inspect(self):
print("=============NeuralNetwork===============")
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] # self.activations[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y):
delta = [np.empty(self.shape[l]) for l in range(1, self.layers)] # Here errors get stored
delta[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1]) # The output error
for l in reversed(range(self.layers - 2)): # The errors get backpropagated
delta[l] = self.weights[l + 1].T # delta[l + 1] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1): # The weights get updated online
for j in range(self.shape[l + 1]):
self.weights[l][j] -= 0.1 * self.activations[l + 1][j] * delta[l][j]
nn = NeuralNetwork((2, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
])
Y = np.array([
[1],
[1],
[0],
[0]
])
# I train my network by randomly picking an example from my training sets
for _ in range(1000):
i = np.random.randint(0, 4)
nn.forward_prop(X[i])
nn.backprop(X[i], Y[i])
for x in X:
nn.forward_prop(x)
print(nn.activations[-1])
The matrix math of backpropagation is quite tough. It is especially confusing that the length of the lists of weight matrices and deltas (actually the list of bias arrays too) should be one less than the amount of layers in a network which makes indexing confusing. Apparently, the problem was due to misindexing. Finally it works!
import numpy as np
sigmoid = lambda x: 1 / (1 + np.exp(-x))
sigmoid_der = lambda x: sigmoid(x) * (1 - sigmoid(x))
ReLU = lambda x: np.maximum(0, x)
ReLU_der = lambda x: x > 0
class NeuralNetwork:
def __init__(self, shape: tuple):
self.layers = len(shape)
self.shape = shape
self.weights = [
np.array([2 * np.random.random(shape[l - 1]) - 1 for _ in range(shape[l])])
for l in range(1, self.layers)
]
self.biases = [np.zeros(l) for l in shape[1:]]
self.weighted_sums = [None for l in shape]
self.activations = [None for l in shape]
self.deltas = [None for l in shape[1:]]
def inspect(self):
print("=============NeuralNetwork===============")
print(f"Shape: {self.shape}")
print(f"Weights: {self.weights}")
print(f"Activations: {self.activations}")
def forward_prop(self, X):
self.activations[0] = X
for l in range(1, self.layers):
self.weighted_sums[l] = self.weights[l - 1] # self.activations[l - 1] + self.biases[l - 1]
self.activations[l] = sigmoid(self.weighted_sums[l])
def backprop(self, X, Y, lr):
self.deltas[-1] = (Y - self.activations[-1]) * sigmoid_der(self.weighted_sums[-1])
for l in range(self.layers - 2, 0, -1):
self.deltas[l - 1] = self.weights[l].T # self.deltas[l] * sigmoid_der(self.weighted_sums[l])
for l in range(self.layers - 1):
for j in range(self.shape[l + 1]):
self.weights[l][j] += lr * self.activations[l] * self.deltas[l][j]
self.biases[l] += self.deltas[l]
def train(self, X, Y, lr, epochs):
for e in range(epochs):
if not e % 1000: self.test(X)
i = np.random.randint(len(X))
self.forward_prop(X[i])
self.backprop(X[i], Y[i], lr)
def test(self, X):
print()
for x in X:
self.forward_prop(x)
print(x, self.activations[-1])
if __name__ == "__main__":
nn = NeuralNetwork((2, 3, 2, 1))
X = np.array([
[1, 0],
[0, 1],
[1, 1],
[0, 0]
])
Y = np.array([
[1],
[1],
[0],
[0]
])
nn.train(X, Y, 0.4, 20000)
nn.test(X)
This post is about the same issue, but no proper answer has been given. And since this problem seems to be widespread, I'll keep my code behind the scene.
Following this source, I've written a network which does well when I give it a training example with a target vector. Using gradient descent I minimize the cost function to make the network provide the target vector when given the corresponding input vector. But this only works for one example!
The main goal of a neural network is to react differently for different inputs, and we should be able to train it to do so. I tried changing network weights by an average of delta-weights computed for each example, which failed: the training process gets stuck with the output vector holding averages of all the target vectors from the training set. No ideas left, no sources found to explain.
How do I train a neural network with a set of examples, not with just one input vector?
Update
For those wondering I'll attach my code below. Try to run this and you will see that instead of outputting 0 1 it provides 0.5 0.5, which is the result of subtracting averaged delta-weights.
import numpy as np
from sympy import symbols, lambdify
from sympy.functions.elementary.exponential import exp
from time import sleep
x = symbols('x')
sigmoid = exp(x) / (1 + exp(x))
sigmoid_der = sigmoid.diff(x)
sigmoid = lambdify(x, sigmoid)
sigmoid_der = lambdify(x, sigmoid_der)
class Neuron:
def __init__(self, amount_of_inputs: int, hidden = True):
self.inputs = np.random.rand(amount_of_inputs) if hidden else np.array([1])
self.bias = 0.0
self._activation = 0.0
self._wsum = 0.0
#property
def activation(self) -> float:
return self._activation
#property
def wsum(self) -> float:
return self._wsum
def calculate(self, indata):
wval = self.inputs * indata + self.bias
self._wsum = wval.sum()
self._activation = sigmoid(self._wsum)
class NeuralNetwork:
def __init__(self, shape: tuple):
self.shape = shape
self.layers = len(self.shape)
self.network = [None for _ in range(self.layers)]
self.network[0] = tuple([Neuron(1, hidden = False) for _ in range(shape[0])])
for L in range(1, self.layers):
self.network[L] = tuple([Neuron(shape[L - 1]) for _ in range(shape[L])])
self.network = tuple(self.network)
y = [symbols(f'y[{i}]') for i in range(shape[self.layers - 1])]
a = [symbols(f'a[{i}]') for i in range(shape[self.layers - 1])]
self.cost_function = sum([(y[i] - a[i]) ** 2 / 2 for i in range(shape[self.layers - 1])])
self.gradient = tuple([self.cost_function.diff(a[i]) for i in range(shape[self.layers - 1])])
self.cost_function = lambdify((y, a), self.cost_function)
self.gradient = lambdify((y, a), self.gradient)
def getLayer(self, L):
return np.array([self.network[L][i].activation for i in range(self.shape[L])])
def getWeightedSum(self, L):
return np.array([self.network[L][i].wsum for i in range(self.shape[L])])
def getInputsMatrix(self, L):
return np.array([self.network[L][i].inputs for i in range(self.shape[L])])
def calculate(self, values):
for i in range(self.shape[0]):
self.network[0][i].calculate(values[i])
for L in range(1, self.layers):
indata = self.getLayer(L - 1)
for j in range(self.shape[L]):
self.network[L][j].calculate(indata)
def get_result(self) -> tuple:
return tuple([self.network[self.layers - 1][i].activation for i in range(self.shape[self.layers - 1])])
def teach(self, targets, examples):
if len(targets) != len(examples):
raise TypeError("The amounts of target and input vectors do not coincide")
activations = [None for _ in range(len(examples))]
delta = activations.copy()
cost_is_low_enough = False
while not cost_is_low_enough:
for x in range(len(examples)):
self.calculate(examples[x])
activations[x] = [self.getLayer(l) for l in range(self.layers)]
delta[x] = [None for _ in range(self.layers - 1)]
network_output = self.getLayer(self.layers - 1)
output_weighted = self.getWeightedSum(self.layers - 1)
gradient_vector = np.array(self.gradient(targets[x], network_output))
delta[x][-1] = gradient_vector * sigmoid_der(output_weighted)
for l in range(self.layers - 2, 0, -1):
weight_matrix = self.getInputsMatrix(l + 1).transpose()
output_weighted = self.getWeightedSum(l)
activation = self.getLayer(l)
for j in range(self.shape[l]):
delta[x][l - 1] = (weight_matrix # delta[x][l]) * sigmoid_der(output_weighted) * activation
dw = [None for _ in range(self.layers - 1)]
for x in range(len(examples)):
self.calculate(examples[x])
for l in range(self.layers - 1):
dw[l] = np.empty(self.shape[l + 1])
for j in range(self.shape[l + 1]):
dw[l][j] = np.mean([delta[x][l][j] for x in range(len(examples))])
for l in range(1, self.layers):
for j in range(self.shape[l]):
for k in range(self.shape[l - 1]):
self.network[l][j].inputs[k] -= 0.1 * dw[l - 1][j]
cost = 0
for x in range(len(examples)):
self.calculate(examples[x])
network_output = np.array(self.get_result())
incost = self.cost_function(targets[x], network_output)
print(network_output, incost)
cost += incost
# sleep(0.05)
cost /= len(examples)
print()
if cost < 0.001: cost_is_low_enough = True
network = NeuralNetwork((2, 4, 1))
examples = np.array([
[1, 2],
[3, 4],
])
targets = np.array([
[0],
[1]
])
network.teach(targets, examples)
values_1 = np.array([5, 10])
network.calculate(values_1)
result = network.get_result()
print(result)
'''
values_2 = np.array([3, 4])
network.calculate(values_2)
result = network.get_result()
print(result)
'''
def body_4(i, indices_set):
c_j = lambda j, indices_set_j : tf.less(j, len_src_sent)
j = tf.constant(0)
indices_set_j = indices_set
def body_j(j, indices_set_j):
align_middle_ = ALIGNMENT_SIZE / 2
align_start_ = 0 - j
align_end_ = len_src_sent - j
c_k = lambda k, indices_set_k: tf.less(k, len_src_sent)
k = tf.constant(1)
indices_set_k = indices_set_j
def body_k(k, indices_set_k):
indices_set_k = tf.concat([indices_set_k, tf.stack([tf.cast([i*len_src_sent+j, align_middle_ + align_start_ + k], tf.int64)])], 0)
k = tf.add(k, 1)
return k, indices_set_k
[index, indices_set_k] = tf.while_loop(c_k, body_k, loop_vars=[k, indices_set_k], shape_invariants=[k.get_shape(), tf.TensorShape([None, None])])
j = tf.add(j,1)
indices_set_j = indices_set_k
return j, indices_set_j
[index, indices_set] = tf.while_loop(c_j, body_j, loop_vars=[j, indices_set_j], shape_invariants=[j.get_shape(), tf.TensorShape([None, None])])
i = tf.add(i, 1)
return i, indices_set
c = lambda i, indices_set: tf.less(i, len_trg_sent-1)
i = tf.constant(0)
indices_set = tf.cast([0, ALIGNMENT_SIZE/2], tf.int64)
indices_set = tf.stack([indices_set])
[index, indices_set] = tf.while_loop(c, body_4, loop_vars=[i, indices_set], shape_invariants=[i.get_shape(), tf.TensorShape([None, None])])
I want to create a tensorflow graph to output a few indices_set for later use of sparsetensor. The indice element should look like [i*len_trg_sent+j,align_middle_ + align_start_ + k] where i is the index of first axis, j second axis and k third of a tensor with shape(len_trg_sent-1, len_src_sent, len_src_sent)
But the code above seems nothing but a dead loop. I am confused about the nested while loop in tensorflow and therefore would appreciate it if anyone can help me.
how can I convert this code (theano) to a simple python lign
[h_vals, _, y_vals] = theano.scan(fn=lstm_Step,
sequences=[dict(input=inputs, taps=[0])],
outputs_info=[h0, c0, None],
non_sequences=[Whx, Whh, Wcx, Wch, Wyh, bh, bc, by],
strict=True)[0]
this is an example of what I means,
import theano
import theano.tensor as tt
def add_multiply(a, b, k):
return a + b + k, a * b * k
def python_main():
x = 1
y = 2
k = 1
tuples = []
for i in range(5):
x, y = add_multiply(x, y, k)
tuples.append((x, y, k))
return tuples
def theano_main():
x = tt.constant(1, dtype='uint32')
y = tt.constant(2, dtype='uint32')
k = tt.scalar(dtype='uint32')
outputs, _ = theano.scan(add_multiply, outputs_info=[x, y], non_sequences=[k], n_steps=5)
g = theano.grad(tt.sum(outputs), k)
f = theano.function(inputs=[k], outputs=outputs + [g])
tuples = []
xvs, yvs, _ = f(1)
for xv, yv in zip(xvs, yvs):
tuples.append((xv, yv, 1))
return tuples
print 'Python:', python_main()
print 'Theano:', theano_main()
So as you say #Nurzhan I need to know what this library does especially what's the means of theano.scan.