I'm running across a very weird issue and was hoping to get someone who might be a bit more familiar. I'm attempting a basic LSTM to do some binary classification with the following coding:
class FakeData(object):
def __init__(self, n):
self.x = np.random.randint(4, size=(n, 90, 4))
blah = np.random.randint(2, size=(n))
self.y = np.zeros((n,2))
self.y[:,0] = blah
self.y[:,1] = 1 - blah
self.mask = np.arange(n)
self.cnt = 0
self.n = n
def getdata(self, n):
if self.cnt + n > self.n:
np.randoom.shuffle(self.mask)
self.cnt = 0
mask = self.mask[self.cnt : self.cnt + n]
return self.x[mask], self.y[mask]
n_data = 10000
batch_size = 10
fd = FakeData(n_data)
n_units = 200
n_classes = 2
x = tf.placeholder(tf.float32, shape=[None, 90, 4])
y_ = tf.placeholder(tf.float32, shape=[None, n_classes])
dropout = tf.placeholder(tf.float32)
w_out = tf.Variable(tf.truncated_normal([n_units, n_classes]))
b_out = tf.Variable(tf.truncated_normal([n_classes]))
lstm = tf.contrib.rnn.LSTMCell(n_units)
cell = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=1.0 - dropout)
new_x = tf.unstack(x, 90, 1)
new_x = tf.Print(new_x, [tf.shape(new_x)], message='newx is: ')
output, state = tf.nn.dynamic_rnn(cell, new_x, dtype=tf.float32)
output = tf.Print(output, [tf.shape(output)], message='output is: ')
logits = tf.matmul(output[-1], w_out) + b_out
logits = tf.Print(logits, [tf.shape(logits)], message='logits is: ')
preds = tf.nn.softmax(logits)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,
labels=y_))
training = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
correct = tf.equal(tf.argmax(preds, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
#
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(10):
batch_x, batch_y = fd.getdata(batch_size)
sess.run([training], feed_dict={x: batch_x, y_: batch_y, dropout: 0})
if i % 100 == 0:
print "Accuracy {}".format(accuracy.eval(feed_dict={x: batch_x,
y_: batch_y, dropout: 0}))
The specific question I have is, for some reason, when I run the code without the tf.Print lines, I get some sort of weird shape transformation error
ValueError: Dimension must be 2 but is 3 for 'transpose' (op: 'Transpose') with shapes: [?,4], [3].
on line
output, state = tf.nn.dynamic_rnn(cell, new_x, dtype=tf.float32)
However, when I include the tf.Print lines, it correctly logs the shapes and is able to run the whole session. Am I missing something?
For clarity, the shapes should be:
input: n x 90 x 4
new_x: 90 x n x 4
output: 90 x n x 200
logits: n x 2
Adding the answer here in case anyone else runs across this problem in the future.
Turns out, a lot of old RNN examples floating around use unstack. However, that turns it into a list of tensors, which dynamic_rnn cannot take as input. The print was converting it from a list of 2d tensors to a 3d tensor so that it was able to handle it correctly. Solution is to transform the data dimensionally in some other way like:
new_x = tf.transpose(x, perm=(1, 0, 2)) (thanks rvinas)
Related
I am trying to create a neural net to feed in an input of shape 249561, 80, 1 and y labels are (249561, 2).
def __init__(self):
super(Net1, self).__init__()
self.conv1 = nn.Conv1d(80, 16, kernel_size=1)
self.conv2_drop = nn.Dropout()
self.fc1 = nn.Linear(1,256)
self.fc2 = nn.Linear(256, 64)
self.fc3 = nn.Linear(64,32)
self.fc4 = nn.Linear(32,2)
def forward(self, x):
print(type(x))
x = F.relu(F.max_pool1d(self.conv1(x), 1))
print(x.shape)
x.reshape(-1)
e1 = F.relu(self.fc1(x))
x = F.dropout(e1, training=self.training)
x = F.relu(self.fc2(x))
x = F.dropout(x, training=self.training)
x = F.relu(self.fc3(x))
x = F.dropout(x, training=self.training)
x = self.fc4(x)
return x
My training loop looks something like this
losses = [];
batch_size = 16
for epoch in range(10):
permutation = torch.randperm(x2.size()[0])
for i in range(0,len(x2), batch_size):
indices = permutation[i:i+batch_size]
batch_x, batch_y = x2[indices], onehot_encoded[indices]
#images = Variable(images.float())
#labels = Variable(labels)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = model(batch_x)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
I have a batch of 16 and I feed in a tensor with a shape [16, 80, 1].I get the following error. RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'target' in call to _thnn_nll_loss2d_forward. I suspect that it is the problem with an output layer but it returns a tensor of shape 2 which is the same as my labels. The output x size torch.Size([16, 16, 2])
Instead of changing the input, why don't you use nn.Conv1d (replace both nn.Conv2d -- you'll need to change the dropout as well)?
If you really wanna change the input, you can add:
batch_x = batch_x[..., None]
After
batch_x, batch_y = x2[indices], onehot_encoded[indices]
I am using an LSTMBlockFusedCell for a network I'm working on.
When fed a single input of size [6, 3169] and an output of [-1, 3169] (casts to input size), it operates properly and predicts. The problem comes when I try and batch those same inputs. With a batch of 100, the input reshapes fine, but the output broadcasts into [600, 3169]. I have tried setting the placeholder specs exactly to the specified input length, but the same error happened. I'm pretty confident that my data is in the correct shape. I run the batch generator and print the output sizes afterward.
Here's my network:
def rnn(x, weight, bias, n_input, vocab):
x = tf.reshape(x, [-1, n_input, vocab])
rnn_cell = tf.contrib.rnn.LSTMBlockFusedCell(n_hidden)
outputs, states = rnn_cell(x, dtype=tf.float32)
return tf.matmul(outputs[-1], weight['output']) + bias['output']
my batch generator:
def new_data(dat, dic, n_steps, batch_size=100):
x = np.zeros(shape=(batch_size, n_steps, len(dic)))
y = np.zeros(shape=(batch_size, n_steps, len(dic)))
j = 0
x_dat = np.zeros(shape=(n_steps, len(dic)))
for sen in dat:
if len(sen) - 1 > n_steps:
for i, word in enumerate(sen[0:n_steps]):
x_dat[i] = one_hot(word, dic)
y_dat = one_hot(sen[n_steps], dic)
x[j % batch_size] = x_dat
y[j % batch_size] = y_dat
if j % batch_size == 0:
yield x,y
x = np.zeros(shape=(batch_size, n_steps, len(dic)))
y = np.zeros(shape=(batch_size, n_steps, len(dic)))
j += 1
and my setup:
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
weights = {
"output" : tf.Variable(tf.random_normal([n_hidden, vocab_size]),name="weight_output")
}
bias = {
"output" : tf.Variable(tf.random_normal([vocab_size]), name="bias_output")
}
pred = rnn(X, weights, bias, n_input, vocab)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=Y))
with tf.Session() as sess:
sess.run(init)
step = 0
for epoch in range(n_epochs):
for x,y in new_data(dat, dic, n_steps):
_ , c = sess.run([optimizer, cost], feed_dict={X: x ,Y: y})
I've tried to figure things out myself and not fallback to actually creating an account here but as a self-taught beginner I've reached a wall with this code.
I'm having two major issues besides optimizing the net architecture when everything is working:
Everytime I've tried to create a new dataset for a test batch I've ran into 'xTensor is not a Tensor' error and could run a session through it, unlike with the iterator which works just fine. I'm loading custom data with dir names as labels with no manually created train and test directories. I'm probably missing a proper method for tf.
I can't work around the current first error I get which is:
'ValueError: Cannot feed value of shape (100,) for Tensor 'Placeholder_1:0', which has shape '(?, 1)' while feed_dict {y=batch_y}. I've tried some of solutions posted on SO but couldn't get it to work.
I'm pasting the whole thing, ########### are the problem triggering zones at the very bottom in the session.
import tensorflow as tf
import numpy as np
import os
# load custom imageset directory
data_path = r"..\datasets\images\flowers"
# setup hypervariables for labels and images format
n_classes = 5
img_width = 64
img_length = 64
channels = 3
# setup hypervariables for network
learning_rate = 0.0001
epochs = 2
batch_size = 100
drop_rate = 0.6
imagepaths = list()
labels = list()
label = 0
classes = sorted(os.walk(data_path).__next__()[1])
# List each sub-directory (the classes)
for c in classes:
c_dir = os.path.join(data_path, c)
walk = os.walk(c_dir).__next__()
# Add each image to the training set
for sample in walk[2]:
imagepaths.append(os.path.join(c_dir, sample))
labels.append(label)
label += 1
total_input = len(labels)
# Convert to Tensor
imagepaths = tf.convert_to_tensor(imagepaths, dtype=tf.string)
labels = tf.convert_to_tensor(labels, dtype=tf.int32)
# Build a TF Queue, shuffle data
dataset = tf.data.Dataset.from_tensor_slices((imagepaths, labels))
# read, decode, resize and normalize images on RGB range
def parse(imagepath, label):
image = tf.read_file(imagepath)
image = tf.image.decode_jpeg(image, channels=channels)
image = tf.image.resize_images(image, [img_length, img_width])
image = image * 1.0/255
return image, label
dataset = dataset.map(parse)
dataset = dataset.shuffle(buffer_size=batch_size*10)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
next_batch = iterator.get_next()
# hypervariables for layers' output size
K = 16
L = 32
M = 200
x = tf.placeholder(tf.float32, [None, 4326])
x_shaped = tf.reshape(x, [-1, img_length, img_width, 3])
y = tf.placeholder(tf.float32, [None, 1])
# weight, bias with stride size and activation method after convolution for layer 1
W1 = tf.Variable(tf.truncated_normal([5, 5, 3, K], stddev=0.03))
b1 = tf.Variable(tf.truncated_normal([K], stddev=0.01))
stride = 1
y1 = tf.nn.relu(tf.nn.conv2d(x_shaped, W1, strides=[1, stride, stride, 1], padding='SAME') + b1)
# weight, bias with stride size and activation method after convolution for layer 2
W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.03))
b2 = tf.Variable(tf.truncated_normal([L], stddev=0.01))
stride = 2 # output is 14x14
y2 = tf.nn.relu(tf.nn.conv2d(y1, W2, strides=[1, stride, stride, 1], padding='SAME') + b2)
yflat = tf.reshape(y2, [-1, 7 * 7 * L])
W3 = tf.Variable(tf.truncated_normal([7 * 7 * L, M], stddev=0.1))
b3 = tf.Variable(tf.truncated_normal([M], stddev=0.01))
y3 = tf.nn.relu(tf.matmul(yflat, W3) + b3)
W4 = tf.Variable(tf.truncated_normal([M, 10], stddev=0.1))
b4 = tf.Variable(tf.truncated_normal([10], stddev=0.01))
ylogits = tf.matmul(y3, W4) + b4
y_ = tf.nn.softmax(ylogits)
# add cross entropy for back prop
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=ylogits, labels=y_))
# add an optimiser for back prop
optimiser = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy)
# define an accuracy assessment operation
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
########## temporary solution for test_x, test_y
test_x, test_y = sess.run(next_batch)
total_batch = int(total_input / batch_size)
# define the iterator for the network
for epoch in range(epochs):
avg_cost = 0
for i in range(total_batch):
batch_x, batch_y = sess.run(next_batch)
########## ValueError: Cannot feed value of shape (100,) for Tensor 'Placeholder_1:0' -> y: batch_y
_, c = sess.run([optimiser, cross_entropy], feed_dict={x_shaped: batch_x, y: batch_y})
avg_cost += c / total_batch
test_acc = sess.run(accuracy,feed_dict={x: test_x, y: test_y})
print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost), " test accuracy: {:.3f}".format(test_acc))
summary = sess.run(merged, feed_dict={x: test_x, y: test_y})
print("\nTraining complete!")
print(sess.run(accuracy, feed_dict={x: test_x, y: test_y}))
are you sure that this part:
_, c = sess.run([optimiser, cross_entropy], feed_dict={x_shaped: batch_x, y: batch_y})
doesn't have to be:
_, c = sess.run([optimiser, cross_entropy], feed_dict={x: batch_x, y: batch_y})
furthermore you've a batchsize of 100, the data is right of the array, the shape is not complete.
What you have (dummy example)
np.zeros((100,)).shape
>>> (100,)
here 100 matches the '?' of the required shape: shape '(?, 1)', the one can be easily added, and it often occurs that numpy does not do this. See the following code:
np.expand_dims(np.zeros((100,)), axis=-1).shape
>>> (100, 1)
axis -1 stand for the last axis, you basically tell numpy to add a dimension in the end. This does not affect the data itself, but the shape of the array. So your code should be:
_, c = sess.run([optimiser, cross_entropy], feed_dict={x_shaped: batch_x, y:np.expand_dims(batch_y, axis=-1)})
I want to use the dropout function of tensorflow to check if I can improve my results (TPR, FPR) of my recurrent neural network.
However I implemented it by following a guide. So I am not sure if I did any mistakes. But if I train my model with with e.g. 10 epochs I get nearly the same results after validation. Thats why I am not sure if I use the dropout function correctly. Is this the right implementation in the following code or did I made something wrong? If I did everything right why do I get nearly the same result then?
hm_epochs = 10
n_classes = 2
batch_size = 128
chunk_size = 341
n_chunks = 5
rnn_size = 32
dropout_prop = 0.5 # Dropout, probability to drop a unit
batch_size_validation = 65536
x = tf.placeholder('float', [None, n_chunks, chunk_size])
y = tf.placeholder('float')
def recurrent_neural_network(x):
layer = {'weights':tf.Variable(tf.random_normal([rnn_size, n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes]))}
x = tf.transpose(x, [1,0,2])
x = tf.reshape(x, [-1, chunk_size])
x = tf.split(x, n_chunks, 0)
lstm_cell = rnn.BasicLSTMCell(rnn_size)
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']
#DROPOUT Implementation -> is this code really working?
#The result is nearly the same after 20 epochs...
output_layer = tf.layers.dropout(output, rate=dropout_prop)
return output
def train_neural_network(x):
prediction = recurrent_neural_network(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(1,hm_epochs+1):
epoch_loss = 0
for i in range(0, training_data.shape[0], batch_size):
epoch_x = np.array(training_data[i:i+batch_size, :, :], dtype='float')
epoch_y = np.array(training_labels[i:i+batch_size, :], dtype='float')
if len(epoch_x) != batch_size:
epoch_x = epoch_x.reshape((len(epoch_x), n_chunks, chunk_size))
else:
epoch_x = epoch_x.reshape((batch_size, n_chunks, chunk_size))
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
train_neural_network(x)
print("rnn - finished!")
In its most basic form, dropout should happen inside the cell and applied to the weights. You only applied it afterwards. This article explained it pretty well with some good visualization and few variations.
To use it in your code, you can either
Implement your own RNN cell where the keep probability is a parameter that initializes the cell or is a parameter that got passed in when it's called every time.
Use an rnn dropout wrapper here.
I'm having a hard time trying to set up a multilayer perceptron neural network to predict the next value of a time-series using Tensorflow.
I read the time-series from a file, split it into three arrays and use those arrays to train, test and validate the network. Unfortunately, my network answers 0.9999 to every input I give to it.
The image below shows the values I expect my network to outcome, note that they range from 2.8 to 4.2
Now, these are the values my network predicts. Though they seem all the same, they're actually 0.9999... (and some difference in the 9th decimal place).
import csv
import numpy as np
from statsmodels.tsa.tsatools import lagmat
import tensorflow as tf
# Data split (values represent percentage)
perc_train = 0.5
perc_test = 0.4
perc_eval = 0.1
# Parameters
learning_rate = 10 ** -3
min_step_size_train = 10 ** -5
training_epochs = 250
display_step = 1
# Network Parameters
n_input = 15
n_classes = 1
n_hidden = (n_input + n_classes) / 2
def get_nn_sets(pmX, pmY):
'''
Splits data into three subsets
'''
trainningIndex = int(len(pmX) * perc_train)
validationIndex = int(len(pmX) * perc_test) + trainningIndex
pmXFit = pmX[:trainningIndex, :]
pmYFit = pmY[:trainningIndex]
pmXTest = pmX[trainningIndex:validationIndex, :]
pmYTest = pmY[trainningIndex:validationIndex]
pmxEvaluate = pmX[validationIndex:, :]
pmYEvaluate = pmY[validationIndex:]
return pmXFit, pmYFit, pmXTest, pmYTest, pmxEvaluate, pmYEvaluate
def read_dollar_file(clip_first = 4000):
'''
Reads the CSV file containing the dollar value for Brazilian real during the years
-----
RETURNS:
A matrix with the file contents
'''
str_vals = []
with open('dolar.csv', 'rb') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',')
for row in spamreader:
# retrieving the first column of the file (the dollar value)
str_vals.append(row[1])
# removing title
str_vals = str_vals[1:]
# removing the empty strings (sunday and holidays have no values)
y = filter(None, str_vals)
# converting from string to float values
y = np.array(y).astype(np.float)
# checking if initial elements should be discarded
if (clip_first > 0):
y = y[clip_first:]
return y
# Create model
def get_multilayer_perceptron(x):
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden], dtype=tf.float64)),
'out': tf.Variable(tf.random_normal([n_hidden, n_classes], dtype=tf.float64))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden], dtype=tf.float64)),
'out': tf.Variable(tf.random_normal([n_classes], dtype=tf.float64))
}
# Hidden layer with relu activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with tanh activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
out_layer = tf.nn.tanh(out_layer)
return out_layer
def run_mlp(inp, outp):
pmXFit, pmYFit, pmXTest, pmYTest, pmXEvaluate, pmYEvaluate = get_nn_sets(inp, outp)
# tf Graph input
x = tf.placeholder("float64", [None, n_input])
y = tf.placeholder("float64", [None, n_classes])
# Construct model
pred = get_multilayer_perceptron(x)
# Define loss and optimizer
cost = tf.nn.l2_loss(tf.sub(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
last_cost = min_step_size_train + 1
for epoch in range(training_epochs):
# Trainning data
for i in range(len(pmXFit)):
batch_x = np.reshape(pmXFit[i,:], (1, n_input))
batch_y = np.reshape(pmYFit[i], (1, n_classes))
# Run optimization
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
# Calculating data error
c = 0.0
for i in range(len(pmXTest)):
batch_x = np.reshape(pmXTest[i,:], (1, n_input))
batch_y = np.reshape(pmYTest[i], (1, n_classes))
# Run Cost function
c += sess.run(cost, feed_dict={x: batch_x, y: batch_y})
c /= len(pmXTest)
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.30f}".format(c))
if abs(c - last_cost) < min_step_size_train:
break
last_cost = c
nn_predictions = np.array([])
for i in range(len(pmXEvaluate)):
batch_x = np.reshape(pmXEvaluate[i,:], (1, n_input))
nn_predictions = np.append(nn_predictions, sess.run(pred, feed_dict={x: batch_x})[0])
print("Optimization Finished!")
nn_predictions.flatten()
return [pmYEvaluate, nn_predictions]
inp = lagmat(read_dollar_file(), n_input, trim='both')
outp = inp[1:, 0]
inp = inp[:-1]
real_value, predicted_value = run_mlp(inp, outp)
I also tried different cost functions and it didn't work. I know I may be missing something really stupid, so I really appreciate your help.
Thanks.
From your code:
out_layer = tf.nn.tanh(out_layer)
tanh can only output values between (-1.0, 1.0), remove this line will make it do better.