How can I complete following GRU based RNN written in tensorflow? - python

So far I have written following code:
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
# load pickled objects (x and y)
x_input, y_actual = pickle.load(open('sample_input.pickle', 'rb'))
x_input = np.reshape(x_input, (50, 1))
y_actual = np.reshape(y_actual, (50, 1))
# parameters
batch_size = 50
hidden_size = 100
# create network graph
input_data = tf.placeholder(tf.float32, [batch_size, 1])
output_data = tf.placeholder(tf.float32, [batch_size, 1])
cell = tf.nn.rnn_cell.GRUCell(hidden_size)
initial_state = cell.zero_state(batch_size, tf.float32)
hidden_state = initial_state
output_of_cell, hidden_state = cell(inputs=input_data, state=hidden_state)
init_op = tf.initialize_all_variables()
softmax_w = tf.get_variable("softmax_w", [hidden_size, 1], )
softmax_b = tf.get_variable("softmax_b", [1])
logits = tf.matmul(output_of_cell, softmax_w) + softmax_b
probabilities = tf.nn.softmax(logits)
sess = tf.Session()
sess.run(init_op)
something = sess.run([probabilities, hidden_state], feed_dict={input_data:x_input, output_data:y_actual})
#cost = tf.nn.sigmoid_cross_entropy_with_logits(logits, output_data)
#sess.close()
But I am getting error for softmax_w/b as uninitialized variables.
I am not getting how should I use these W and b and carry out train operation.
Something like following:
## some cost function
## training operation minimizing cost function using gradient descent optimizer

tf.initialize_all_variables() gets the "current" set of variables from the graph. Since you are creating softmax_w and softmax_b after your call to tf.initialize_all_variables(), they are not in the list that tf.initialize_all_variables() consults, and hence not initialized when you run sess.run(init_op). The following should work :
softmax_w = tf.get_variable("softmax_w", [hidden_size, 1], )
softmax_b = tf.get_variable("softmax_b", [1])
init_op = tf.initialize_all_variables()

Related

TensorFlow not recognising feed_dict input

I am running a simple neural network for linear regression. However TensorFlow is complaining that my feed_dict placeholder(s) are not an element of the graph. However my placeholders and my model are all defined within my graph as can be seen below:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense
with tf.Graph().as_default():
x = tf.placeholder(dtype=tf.float32, shape = (None,4))
y = tf.placeholder(dtype=tf.float32, shape = (None,4))
model = tf.keras.Sequential([
Dense(units=4, activation=tf.nn.relu)
])
y = model(x)
loss = tf.reduce_mean(tf.square(y-x))
train_op = tf.train.AdamOptimizer().minimize(loss)
with tf.Session() as sess:
sess.run(train_op, feed_dict = {x:np.ones(dtype='float32', shape=(4)),
y:5*np.ones(dtype='float32', shape=(4,))})
This gives an error:
TypeError: Cannot interpret feed_dict key as Tensor: Tensor
Tensor("Placeholder:0", shape=(?, 4), dtype=float32) is not an element of this graph.
____________UPDATE________________
Following the advice from #Silgon and #Mcangus, I have modified the code:
g= tf.Graph()
with g.as_default():
x = tf.placeholder(dtype=tf.float32, shape = (None,4))
model = tf.keras.Sequential([
Dense(units=4, activation=tf.nn.relu)
])
y = model(x)
loss = tf.reduce_mean(tf.square(y-x))
train_op = tf.train.AdamOptimizer().minimize(loss)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session(graph=g) as sess:
sess.run(init_op)
for i in range(5):
_ , answer = sess.run([train_op,loss], feed_dict = {x:np.ones(dtype='float32', shape=(1,4)),
y:5*np.ones(dtype='float32', shape=(1,4))})
print(answer)
However the model doesn't appear to be learning:
16.0
16.0
16.0
16.0
16.0
The error tells you that the variable is not an element of the graph. It might be because it's not in the same scope. One way to solve it is to have a structure like the following.
# define a graph
graph = tf.Graph()
with graph.as_default():
# placeholder
x = tf.placeholder(...)
y = tf.placeholder(...)
# create model
model = create_model(x, w, b)
with tf.Session(graph=graph) as sess:
# initialize all the variables
sess.run(init)
Also, as #Mcangus points out, be careful with the definition of your variables.
I believe your issue is this line:
y = model(x)
You overwrite y with the output of your model so it's no longer a placeholder.

Tensorflow model outputs probability values larger than 1

I'm working on this classification program where i'm training my model to predict whether the object is a nut or a screw. I created my own dataset since i did not get any. I trained my model but i'm not getting correct predictions. Probability of values go beyond 1, basically i get garbage values.
I get this predicted value: [[9.990779e-01 9.220659e-04]]
#Training code
import dataset
import tensorflow as tf
import time
from datetime import timedelta
import math
import random
import numpy as np
import os
# Adding Seed so that random initialization is consistent
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
batch_size = 20
# Prepare input data
classes = os.listdir('training_set')
num_classes = len(classes)
# 20% of the data will automatically be used for validation
validation_size = 0.2
img_size = 128
num_channels = 3
train_path = 'training_set'
# We shall load all the training and validation images and labels into
memory using openCV and use that during training
data = dataset.read_train_sets(train_path, img_size, classes,
validation_size=validation_size)
print("Complete reading input data. Will Now print a snippet of it")
print("Number of files in Training-
set:\t\t{}".format(len(data.train.labels)))
print("Number of files in Validation-
set:\t{}".format(len(data.valid.labels)))
session = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size,
num_channels], name='x')
## labels
y_true = tf.placeholder(tf.float32, shape=[None, num_classes],
name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)
##Network graph params
filter_size_conv1 = 3
num_filters_conv1 = 32
filter_size_conv2 = 3
num_filters_conv2 = 32
filter_size_conv3 = 3
num_filters_conv3 = 32
fc_layer_size = 128
def create_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def create_biases(size):
return tf.Variable(tf.constant(0.05, shape=[size]))
def create_convolutional_layer(input,
num_input_channels,
conv_filter_size,
num_filters):
## We shall define the weights that will be trained using create_weights function.
weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
## We create biases using the create_biases function. These are also trained.
biases = create_biases(num_filters)
## Creating the convolutional layer
layer = tf.nn.conv2d(input=input,
filter=weights,
strides=[1, 1, 1, 1],
padding='SAME')
layer += biases
## We shall be using max-pooling.
layer = tf.nn.max_pool(value=layer,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
## Output of pooling is fed to Relu which is the activation function for us.
layer = tf.nn.relu(layer)
return layer
def create_flatten_layer(layer):
# We know that the shape of the layer will be [batch_size img_size img_size num_channels]
# But let's get it from the previous layer.
layer_shape = layer.get_shape()
## Number of features will be img_height * img_width* num_channels. But we shall calculate it in place of hard-coding it.
num_features = layer_shape[1:4].num_elements()
## Now, we Flatten the layer so we shall have to reshape to num_features
layer = tf.reshape(layer, [-1, num_features])
return layer
def create_fc_layer(input,
num_inputs,
num_outputs,
use_relu=True):
# Let's define trainable weights and biases.
weights = create_weights(shape=[num_inputs, num_outputs])
biases = create_biases(num_outputs)
# Fully connected layer takes input x and produces wx+b.Since, these are matrices, we use matmul function in Tensorflow
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return layer
layer_conv1 = create_convolutional_layer(input=x,
num_input_channels=num_channels,
conv_filter_size=filter_size_conv1,
num_filters=num_filters_conv1)
layer_conv2 = create_convolutional_layer(input=layer_conv1,
num_input_channels=num_filters_conv1,
conv_filter_size=filter_size_conv2,
num_filters=num_filters_conv2)
layer_conv3 = create_convolutional_layer(input=layer_conv2,
num_input_channels=num_filters_conv2,
conv_filter_size=filter_size_conv3,
num_filters=num_filters_conv3)
layer_flat = create_flatten_layer(layer_conv3)
layer_fc1 = create_fc_layer(input=layer_flat,
num_inputs=layer_flat.get_shape()
[1:4].num_elements(),
num_outputs=fc_layer_size,
use_relu=True)
layer_fc2 = create_fc_layer(input=layer_fc1,
num_inputs=fc_layer_size,
num_outputs=num_classes,
use_relu=False)
y_pred = tf.nn.softmax(layer_fc2, name='y_pred')
y_pred_cls = tf.argmax(y_pred, dimension=1)
session.run(tf.global_variables_initializer())
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,
labels=y_true)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
session.run(tf.global_variables_initializer())
def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss):
acc = session.run(accuracy, feed_dict=feed_dict_train)
val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
msg = "Training Epoch {0} --- Training Accuracy: {1:>6.1%}, Validation
Accuracy: {2:>6.1%}, Validation Loss: {3:.3f}"
print(msg.format(epoch + 1, acc, val_acc, val_loss))
total_iterations = 0
saver = tf.train.Saver()
def train(num_iteration):
global total_iterations
for i in range(total_iterations,
total_iterations + num_iteration):
x_batch, y_true_batch, _, cls_batch =
data.train.next_batch(batch_size)
x_valid_batch, y_valid_batch, _, valid_cls_batch =
data.valid.next_batch(batch_size)
feed_dict_tr = {x: x_batch,
y_true: y_true_batch}
feed_dict_val = {x: x_valid_batch,
y_true: y_valid_batch}
session.run(optimizer, feed_dict=feed_dict_tr)
if i % int(data.train.num_examples / batch_size) == 0:
val_loss = session.run(cost, feed_dict=feed_dict_val)
epoch = int(i / int(data.train.num_examples / batch_size))
show_progress(epoch, feed_dict_tr, feed_dict_val, val_loss)
saver.save(session, 'C:\\Nutsbolts\\nuts-screws-model')
total_iterations += num_iteration
train(num_iteration=3000)
#Prediction code
import tensorflow as tf
import numpy as np
import os,glob,cv2
import sys,argparse
# First, pass the path of the image
dir_path = 'C:\\nutsbolts\\testing_set\\nuts'
image_path= 'nuts11.jpg'
filename = dir_path +'/' +image_path
image_size=128
num_channels=3
images = []
# Reading the image using OpenCV
image = cv2.imread(filename)
# Resizing the image to our desired size and preprocessing will be done
exactly as done during training
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
images.append(image)
images = np.array(images, dtype=np.uint8)
images = images.astype('float32')
images = np.multiply(images, 1.0/255.0)
#The input to the network is of shape [None image_size image_size
num_channels]. Hence we reshape.
x_batch = images.reshape(1, image_size,image_size,num_channels)
## Let us restore the saved model
sess = tf.Session()
# Step-1: Recreate the network graph. At this step only graph is created.
saver = tf.train.import_meta_graph('nuts-screws-model.meta')
# Step-2: Now let's load the weights saved using the restore method.
saver.restore(sess, tf.train.latest_checkpoint('./'))
# Accessing the default graph which we have restored
graph = tf.get_default_graph()
# Now, let's get hold of the op that we can be processed to get the output.
# In the original network y_pred is the tensor that is the prediction of the
network
y_pred = graph.get_tensor_by_name("y_pred:0")
## Let's feed the images to the input placeholders
x= graph.get_tensor_by_name("x:0")
y_true = graph.get_tensor_by_name("y_true:0")
y_test_images = np.zeros((1, len(os.listdir('testing_set'))))
### Creating the feed_dict that is required to be fed to calculate y_pred
feed_dict_testing = {x: x_batch, y_true: y_test_images}
result=sess.run(y_pred, feed_dict=feed_dict_testing)
# result is of this format [probabiliy_of_nuts probability_of_screws]
print(result)
9.990779e-01 actually is below 1. You could see it as: 9.990779 * (the exponential of -01).

deploying the Tensorflow model in Python

Need help in implementing the Tensorflow model in real time.
While I am training everything is working fine but when I move on for a realtime forecast or prediction, the output what I received flunked.
I do not know why is this happening.
I used the reference of teh code from here: https://www.kaggle.com/raoulma/ny-stock-price-prediction-rnn-lstm-gru/notebook
And tried to implement or deploy using the same code with few changes.
See the following code:
import numpy as np
import pandas as pd
import sklearn
import sklearn.preprocessing
import datetime
import os
import tensorflow as tf
df = pd.read_csv("Realtime_Values.csv", index_col = 0)
df.info()
def load_data(stock,seq_len):
data_raw = stock.as_matrix() # convert to numpy array
data = []
for index in range(len(data_raw) - seq_len):
data.append(data_raw[index: index + seq_len])
#print(len(data))
data = np.array(data);
x_forecast = data[:,:-1,:]
return x_forecast
def normalize_data(df):
cols = list(df.columns.values)
min_max_scaler = sklearn.preprocessing.MinMaxScaler()
df = pd.DataFrame(min_max_scaler.fit_transform(df.values))
df.columns = cols
return df
model_path ="modelsOHLC"
seq_len = 9
# parameters
n_steps = seq_len-1
n_inputs = 4
n_neurons = 100
n_outputs = 4
n_layers = 4
learning_rate = 0.01
batch_size = 10
n_epochs = 1000
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_outputs])
layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.elu)
for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
outputs = outputs[:,n_steps-1,:] # keep only last output of sequence
loss = tf.reduce_mean(tf.square(outputs - y)) # loss function = mean squared error
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
saver = tf.train.Saver()
sess =tf.Session()
sess.run(tf.global_variables_initializer())
if(tf.train.checkpoint_exists(tf.train.latest_checkpoint(model_path))):
saver.restore(sess, tf.train.latest_checkpoint(model_path))
df = normalize_data(df)
x_forecast = load_data(df,seq_len)
y_forecast_pred = sess.run(outputs, feed_dict={X: x_forecast})
print(y_forecast_pred)
Can anyone help me in getting the above code run in real time without any issues?
There is a possibility that the code failed to find the saved weights when program trains the model; thus the predictions are being generated at an untrained state. Your code for training model is:
if (tf.train.checkpoint_exists(tf.train.latest_checkpoint(model_path))):
saver.restore(sess, tf.train.latest_checkpoint(model_path))
To fix this problem:
Add a debugging code such as print("checkpoint exists!")
Place breakpoint through a debugger before or after save.restore(...) to find a checkpoint to restore from.
Look at the model_path to ensure your checkpoints are saved correctly.

Tensorflow: Why must `saver = tf.train.Saver()` be declared after variables are declared?

Important clarification: I was only running this section, the graph definition, in a notebook enviroment. I had not run an actual session yet.
When running this code:
with graph.as_default(): #took out " , tf.device('/cpu:0')"
saver = tf.train.Saver()
valid_examples = np.array(random.sample(range(1, valid_window), valid_size)) #put inside graph to get new words each time
train_dataset = tf.placeholder(tf.int32, shape=[batch_size, cbow_window*2 ])
train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
valid_datasetSM = tf.constant(valid_examples, dtype=tf.int32)
embeddings = tf.get_variable( 'embeddings',
initializer= tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
softmax_weights = tf.get_variable( 'softmax_weights',
initializer= tf.truncated_normal([vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
softmax_biases = tf.get_variable('softmax_biases',
initializer= tf.zeros([vocabulary_size]), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_size*cbow_window*2, embedding_size] )
segments= np.arange(batch_size).repeat(cbow_window*2)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
#return tf.reduce_mean( tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
#labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size))
loss = tf.reduce_mean(
tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size))
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True))
normSM = tf.sqrt(tf.reduce_sum(tf.square(softmax_weights), 1, keepdims=True))
normalized_embeddings = embeddings / norm
normalized_embeddingsSM = softmax_weights / normSM
valid_embeddings = tf.nn.embedding_lookup(
normalized_embeddings, valid_dataset)
valid_embeddingsSM = tf.nn.embedding_lookup(
normalized_embeddingsSM, valid_datasetSM)
similarity = tf.matmul(valid_embeddings, tf.transpose(normalized_embeddings))
similaritySM = tf.matmul(valid_embeddingsSM, tf.transpose(normalized_embeddingsSM))
I got this error
ValueError: No variables to save
while pointing to this line
saver = tf.train.Saver()
I searched stack overflow and found this answer
Tensorflow ValueError: No variables to save from
So I simply put that line at the bottom of the graph definition like so
with graph.as_default(): #took out " , tf.device('/cpu:0')"
valid_examples = np.array(random.sample(range(1, valid_window), valid_size)) #put inside graph to get new words each time
train_dataset = tf.placeholder(tf.int32, shape=[batch_size, cbow_window*2 ])
train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
valid_datasetSM = tf.constant(valid_examples, dtype=tf.int32)
embeddings = tf.get_variable( 'embeddings',
initializer= tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
softmax_weights = tf.get_variable( 'softmax_weights',
initializer= tf.truncated_normal([vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
softmax_biases = tf.get_variable('softmax_biases',
initializer= tf.zeros([vocabulary_size]), trainable=False )
embed = tf.nn.embedding_lookup(embeddings, train_dataset) #train data set is
embed_reshaped = tf.reshape( embed, [batch_size*cbow_window*2, embedding_size] )
segments= np.arange(batch_size).repeat(cbow_window*2)
averaged_embeds = tf.segment_mean(embed_reshaped, segments, name=None)
loss = tf.reduce_mean(
tf.nn.sampled_softmax_loss(weights=softmax_weights, biases=softmax_biases, inputs=averaged_embeds,
labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size))
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True))
normSM = tf.sqrt(tf.reduce_sum(tf.square(softmax_weights), 1, keepdims=True))
normalized_embeddings = embeddings / norm
normalized_embeddingsSM = softmax_weights / normSM
valid_embeddings = tf.nn.embedding_lookup(
normalized_embeddings, valid_dataset)
valid_embeddingsSM = tf.nn.embedding_lookup(
normalized_embeddingsSM, valid_datasetSM)
similarity = tf.matmul(valid_embeddings, tf.transpose(normalized_embeddings))
similaritySM = tf.matmul(valid_embeddingsSM, tf.transpose(normalized_embeddingsSM))
saver = tf.train.Saver()
And then there were no errors!
Why is this so? The graph definition is only defining the graph, not running anything. Perhaps it's an bug prevention measure?
It does not have to. tf.train.Saver has a defer_build argument that, if set to True, allows you to define variables after it has been constructed. You then need to call build explicitly though.
saver = tf.train.Saver(defer_build=True)
# construct your graph, create variables...
...
saver.build()
graph.finalize()
# go on with training
From the documentation on tf.train.Saver the __init__ method has a parameter var_list with the description:
var_list: A list of Variable/SaveableObject, or a dictionary mapping names
to SaveableObjects. If None, defaults to the list of all saveable objects.
This suggests that the saver makes a list of variables to save when it's first created, which by default contains all of the variables it can find. If no variables have been made, an error makes sense since there are no variables to save.
Random examples:
import tensorflow as tf
saver = tf.train.Saver()
The above throws an error, and so does below
import tensorflow as tf
x = tf.placeholder(dtype=tf.float32)
saver = tf.train.Saver()
But this last example runs,
import tensorflow as tf
x = tf.Variable(0.0)
saver = tf.train.Saver()

LSTM won't overfit training data

I have been trying to use an LSTM for regression in TensorFlow, but it doesn't fit the data. I have successfully fit the same data in Keras (with the same size network). My code for trying to overfit a sine wave is below:
import tensorflow as tf
import numpy as np
yt = np.cos(np.linspace(0, 2*np.pi, 256))
xt = np.array([yt[i-50:i] for i in range(50, len(yt))])[...,None]
yt = yt[-xt.shape[0]:]
g = tf.Graph()
with g.as_default():
x = tf.constant(xt, dtype=tf.float32)
y = tf.constant(yt, dtype=tf.float32)
lstm = tf.nn.rnn_cell.BasicLSTMCell(32)
outputs, state = tf.nn.dynamic_rnn(lstm, x, dtype=tf.float32)
pred = tf.layers.dense(outputs[:,-1], 1)
loss = tf.reduce_mean(tf.square(pred-y))
train_op = tf.train.AdamOptimizer().minimize(loss)
init = tf.global_variables_initializer()
sess = tf.InteractiveSession(graph=g)
sess.run(init)
for i in range(200):
_, l = sess.run([train_op, loss])
print(l)
This results in a MSE of 0.436067 (while Keras got to 0.0022 after 50 epochs), and the predictions range from -0.1860 to -0.1798. What am I doing wrong here?
Edit:
When I change my loss function to the following, the model fits properly:
def pinball(y_true, y_pred):
tau = np.arange(1,100).reshape(1,-1)/100
pin = tf.reduce_mean(tf.maximum(y_true[:,None] - y_pred, 0) * tau +
tf.maximum(y_pred - y_true[:,None], 0) * (1 - tau))
return pin
I also change the assignments of pred and loss to
pred = tf.layers.dense(outputs[:,-1], 99)
loss = pinball(y, pred)
This results in a decrease of loss from 0.3 to 0.003 as it trains, and seems to properly fit the data.
Looks like a shape/broadcasting issue. Here's a working version:
import tensorflow as tf
import numpy as np
yt = np.cos(np.linspace(0, 2*np.pi, 256))
xt = np.array([yt[i-50:i] for i in range(50, len(yt))])
yt = yt[-xt.shape[0]:]
g = tf.Graph()
with g.as_default():
x = tf.constant(xt, dtype=tf.float32)
y = tf.constant(yt, dtype=tf.float32)
lstm = tf.nn.rnn_cell.BasicLSTMCell(32)
outputs, state = tf.nn.dynamic_rnn(lstm, x[None, ...], dtype=tf.float32)
pred = tf.squeeze(tf.layers.dense(outputs, 1), axis=[0, 2])
loss = tf.reduce_mean(tf.square(pred-y))
train_op = tf.train.AdamOptimizer().minimize(loss)
init = tf.global_variables_initializer()
sess = tf.InteractiveSession(graph=g)
sess.run(init)
for i in range(200):
_, l = sess.run([train_op, loss])
print(l)
x gets a batch dimension of 1 before going into dynamic_rnn, since with time_major=False the first dimension is expected to be a batch dimension. It's important that the last dimension of the output of tf.layers.dense get squeezed off so that it doesn't broadcast with y (TensorShape([256, 1]) and TensorShape([256]) broadcast to TensorShape([256, 256])). With those fixes it converges:
5.78507e-05
You are not passing-on the state from one call of dynamic_rnn to next. That's the problem for sure.
Also, why take only last item of the output through the dense layer and onward?

Categories