I construct a LSTM network, and my input's dimension is 100*100*83 ( batch_size=100, steps = 100, char_vector = 83). I build a two LSTM layers which has 512 hidden units.
# coding: utf-8
from __future__ import print_function
import tensorflow as tf
import numpy as np
import time
class CharRNN:
def __init__(self, num_classes, batch_size=64, num_steps=50, lstm_size=128, num_layers =2,\
learning_rate = 0.001, grad_clip=5, keep_prob=0.001,sampling= False):
# True for SGD
if sampling == True:
self.batch_size, self.num_steps = 1,1
else:
self.batch_size, self.num_steps = batch_size, num_steps
tf.reset_default_graph()
self.inputs, self.targets, self.keep_prob = self.build_inputs(self.batch_size,self.num_steps)
self.keep_prob = keep_prob
self.cell, self.initial_state = self.build_lstm(lstm_size,num_layers,self.batch_size,self.keep_prob)
# print(self.cell.state_size)
x_one_hot = tf.one_hot(self.inputs, num_classes)
print("cell state size: ",self.cell.state_size)
print("cell initial state: ",self.initial_state)
print("this is inputs", self.inputs)
print("x_one_hot: ",x_one_hot)
outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)
def build_inputs(self, num_seqs, num_steps):
inputs = tf.placeholder(tf.int32, shape=(num_seqs, num_steps), name = "inputs")
targets = tf.placeholder(tf.int32, shape= (num_seqs, num_steps), name="targets")
print('inputs shape: ',inputs.shape)
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
return inputs, targets, keep_prob
def build_lstm(self, lstm_size, num_layers, batch_size, keep_prob):
# construct lstm cell
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
# add dropout
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob= keep_prob)
# stack multiple rnn cells
cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])
initial_state = cell.zero_state(batch_size, tf.float32)
return cell, initial_state
if __name__ == '__main__':
len_vocab = 83
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob = 0.5
epochs = 20
save_every_n = 200
print("h1")
model = CharRNN(len_vocab, batch_size = batch_size, num_steps=num_steps, lstm_size = lstm_size,num_layers=num_layers\
,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob
I get a dimension not match error at tf.nn.dynamic_rnn.
error message is like this:
inputs shape: (100, 100)
cell state size: (LSTMStateTuple(c=512, h=512), LSTMStateTuple(c=512, h=512))
cell initial state: (LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>))
this is inputs Tensor("inputs:0", shape=(100, 100), dtype=int32)
x_one_hot: Tensor("one_hot:0", shape=(100, 100, 83), dtype=float32)
Traceback (most recent call last):
File "./seq2_minimal.py", line 70, in <module>
,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob)
File "./seq2_minimal.py", line 32, in __init__
outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 614, in dynamic_rnn
dtype=dtype)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 777, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 762, in _time_step
(output, new_state) = call_cell()
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 748, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1066, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 891, in __call__
output, new_state = self._cell(inputs, state, scope)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 441, in call
value=self._linear([inputs, h]), num_or_size_splits=4, axis=1)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1189, in __call__
res = math_ops.matmul(array_ops.concat(args, 1), self._weights)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1891, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2437, in _mat_mul
name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2958, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2209, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2159, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 627, in call_cpp_shape_fn
require_shape_fn)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 691, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Dimensions must be equal, but are 1024 and 595 for 'rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/MatMul_1' (op: 'MatMul') with input shapes: [100,1024], [595,2048].
I search that and find that tensorflow's lstm cell should adjust its input size automatically. But error message said this.
It shows
input size is [100, 1024] and lstm is [595, 2048].
Thanks firstly.
cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])
TO
cell = tf.nn.rnn_cell.MultiRNNCell([drop])
because your given input tensor and produces tensor are not the same.
Related
I'm developing a recurrent neural network in python using keras to do binary classification on roulette wheel data. I'm trying to compile my code but it's crashing, could you help me fix the code please?
Here is my code:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd
columns = ['data', 'resultado']
base = pd.read_csv("blaze_values_27_01_2023_VERMELHO_1.csv", header = None, names = columns)
base = base.dropna()
base_treinamento = base.iloc[:, 1:2]
normalizador = MinMaxScaler(feature_range=[0,1])
base_treinamento_normalizada = normalizador.fit_transform(base_treinamento)
previsores = []
saida_real = []
for i in range (90,1809):
previsores.append(base_treinamento_normalizada[i-90:i,0])
saida_real.append(base_treinamento_normalizada[i,0])
previsores, saida_real = np.array(previsores), np.array(saida_real)
previsores = np.reshape(previsores, (previsores.shape[0],previsores.shape[1],1))
regressor = Sequential()
regressor.add(Dense(100, input_shape = (previsores.shape[1],1), activation='relu'))
regressor.add(Dense(1, activation = 'sigmoid'))
regressor.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
regressor.fit(previsores, saida_real, epochs = 100, batch_size = 32)
The error I am getting is:
Epoch 1/100
Traceback (most recent call last):
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/spyder_kernels/py3compat.py", line 356, in compat_exec
exec(code, globals, locals)
File "/Users/mac/untitled0.py", line 34, in
regressor.fit(previsores, saida_real, epochs = 100, batch_size = 32)
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/var/folders/1j/tbck9lp54kndrb4nl53xdjgr0000gp/T/autograph_generated_file27ts368.py", line 15, in tf__train_function
retval = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
ValueError: in user code:
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1051, in train_function *
return step_function(self, iterator)
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 890, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 948, in compute_loss
return self.compiled_loss(
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/losses.py", line 139, in __call__
losses = call_fn(y_true, y_pred)
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/losses.py", line 243, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/losses.py", line 1930, in binary_crossentropy
backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
File "/Users/mac/opt/anaconda3/lib/python3.9/site-packages/keras/backend.py", line 5283, in binary_crossentropy
return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
ValueError: `logits` and `labels` must have the same shape, received ((None, 90, 1) vs (None,)).
I used to use this code to train variational autoencoder (I found the code on a forum and adapted it to my needs) :
import pickle
from pylab import mpl,plt
#lecture des résultats
filename=r'XXX.pic'
data_file=open(filename,'rb')
X_sec = pickle.load(data_file)#[:,3000:]
data_file.close()
size=X_sec.shape[0]
prop=0.75
cut=int(size*prop)
X_train=X_sec[:cut]
X_test=X_sec[cut:]
std=X_train.std()
X_train /= std
X_test /= std
import keras
from keras import layers
from keras import backend as K
from keras.models import Model
import numpy as np
#encoding_dim = 12
sig_shape = (3600,)
batch_size = 128
latent_dim = 12
input_sig = keras.Input(shape=sig_shape)
x = layers.Dense(128, activation='relu')(input_sig)
x = layers.Dense(64, activation='relu')(x)
shape_before_flattening = K.int_shape(x)
x = layers.Dense(32, activation='relu')(x)
z_mean = layers.Dense(latent_dim)(x)
z_log_var = layers.Dense(latent_dim)(x)
encoder=Model(input_sig,[z_mean,z_log_var])
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
mean=0., stddev=1.)
return z_mean + K.exp(z_log_var) * epsilon
z = layers.Lambda(sampling)([z_mean, z_log_var])
decoder_input = layers.Input(K.int_shape(z)[1:])
x = layers.Dense(np.prod(shape_before_flattening[1:]),activation='relu')(decoder_input)
x = layers.Reshape(shape_before_flattening[1:])(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(3600, activation='linear')(x)
decoder = Model(decoder_input, x)
z_decoded = decoder(z)
class CustomVariationalLayer(keras.layers.Layer):
def vae_loss(self, x, z_decoded):
x = K.flatten(x)
z_decoded = K.flatten(z_decoded)
xent_loss = keras.metrics.mae(x, z_decoded)
kl_loss = -5e-4 * K.mean(
1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
return K.mean(xent_loss + kl_loss)
def call(self, inputs):
x = inputs[0]
z_decoded = inputs[1]
loss = self.vae_loss(x, z_decoded)
self.add_loss(loss, inputs=inputs)
return x
y = CustomVariationalLayer()([input_sig, z_decoded])
vae = Model(input_sig, y)
vae.compile(optimizer='rmsprop', loss=None)
vae.summary()
vae.fit(x=X_train, y=None,shuffle=True,epochs=100,batch_size=batch_size,validation_data=(X_test, None))
it used to work smoothly but I have updated my librairies and now I get this error :
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1619, in _create_c_op
c_op = c_api.TF_FinishOperation(op_desc)
InvalidArgumentError: Duplicate node name in graph:
'lambda_1/random_normal/shape'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File
"I:\Documents\Nico\Python\finance\travail_amont\autoencoder_variationnel_bruit.py",
line 74, in
z = layers.Lambda(sampling)([z_mean, z_log_var])
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\backend\tensorflow_backend.py",
line 75, in symbolic_fn_wrapper
return func(*args, **kwargs)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\engine\base_layer.py",
line 506, in call
output_shape = self.compute_output_shape(input_shape)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\layers\core.py",
line 674, in compute_output_shape
x = self.call(xs)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\layers\core.py",
line 716, in call
return self.function(inputs, **arguments)
File
"I:\Documents\Nico\Python\finance\travail_amont\autoencoder_variationnel_bruit.py",
line 71, in sampling
mean=0., stddev=1.)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\keras\backend\tensorflow_backend.py",
line 4329, in random_normal
shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\keras\backend.py",
line 5602, in random_normal
shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\random_ops.py",
line 69, in random_normal
shape_tensor = tensor_util.shape_tensor(shape)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\tensor_util.py",
line 994, in shape_tensor
return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1314, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\array_ops.py",
line 1368, in _autopacking_conversion_function
return _autopacking_helper(v, dtype, name or "packed")
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\array_ops.py",
line 1304, in _autopacking_helper
return gen_array_ops.pack(elems_as_tensors, name=scope)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\ops\gen_array_ops.py",
line 5704, in pack
"Pack", values=values, axis=axis, name=name)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\op_def_library.py",
line 742, in _apply_op_helper
attrs=attr_protos, op_def=op_def)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\func_graph.py",
line 595, in _create_op_internal
compute_device)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 3322, in _create_op_internal
op_def=op_def)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1786, in init
control_input_ops)
File
"C:\Users\user\AppData\Local\conda\conda\envs\my_root\lib\site-packages\tensorflow_core\python\framework\ops.py",
line 1622, in _create_c_op
raise ValueError(str(e))
ValueError: Duplicate node name in graph:
'lambda_1/random_normal/shape'
I do not know this error : "Duplicate node name in graph". Does anyone has a clue ? Thanks.
If you're using tf 2.x, then import your keras modules as follows.
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.kerasimport backend as K
from tensorflow.keras.models import Model
More related on this, #36509, #130
I'm experiencing a "ValueError: Shapes (None, None) and (None, 8, 8, 7) are incompatible" anytime I am training my model in Tensorflow. So far:
history = model.fit(train_batches,
steps_per_epoch=train_steps,
class_weight=class_weights,
validation_data=validation_batches,
validation_steps=val_steps,
epochs=30,
verbose=1,
callbacks=callbacks_list
)
gives this stacktrace:
Traceback (most recent call last):
File "/home/brian/Desktop/381-deep-learning/main.py", line 410, in <module>
epochs=30
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 324, in new_func
return func(*args, **kwargs)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 1479, in fit_generator
initial_epoch=initial_epoch)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 66, in _method_wrapper
return method(self, *args, **kwargs)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 848, in fit
tmp_logs = train_function(iterator)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 580, in __call__
result = self._call(*args, **kwds)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 627, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 506, in _initialize
*args, **kwds))
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 2446, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 2777, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 2667, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py", line 981, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 441, in wrapped_fn
return weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py", line 968, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:533 train_step **
y, y_pred, sample_weight, regularization_losses=self.losses)
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/engine/compile_utils.py:205 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/losses.py:143 __call__
losses = self.call(y_true, y_pred)
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/losses.py:246 call
return self.fn(y_true, y_pred, **self._fn_kwargs)
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/losses.py:1527 categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/keras/backend.py:4561 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/home/brian/Desktop/381-deep-learning/venv/lib/python3.6/site-packages/tensorflow/python/framework/tensor_shape.py:1117 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, None) and (None, 8, 8, 7) are incompatible
Process finished with exit code 1
after making it to Epoch 1/30.
Here is my model definition in case anyone in wondering:
# Create Inception Res Net model as used in paper
resnet = tf.keras.applications.inception_resnet_v2.InceptionResNetV2()
print("Layers of ResNet: "+str(len(resnet.layers))) //782 layers
x = resnet.layers[-28].output
x = tf.keras.layers.Dropout(0.25)(x)
# Make a prediction layer with 7 nodes for the 7 dir in our train_dir.
predictions_layer = tf.keras.layers.Dense(7, activation='softmax')(x)
# print(resnet.input)
# inputs=resnet.input selects the input layer, outputs=predictions refers to the
# dense layer we created above.
model = tf.keras.Model(inputs=resnet.input, outputs=predictions_layer)
What I believe may be the cause of my issue is my model declaration because when I observe my model.summary() what I see is this (with all in between layers excluded of course):
Output of model.summary()
input_1 (InputLayer) [(None, 299, 299, 3) 0
__________________________________________________________________________________________________
dropout (Dropout) (None, 8, 8, 192) 0 batch_normalization_195[0][0]
__________________________________________________________________________________________________
dense (Dense) (None, 8, 8, 7) 1351 dropout[0][0]
==================================================================================================
Total params: 47,465,959
Trainable params: 47,411,559
Non-trainable params: 54,400
I included a pastebin of my entire file in case I missed anything: https://pastebin.com/raw/E0VQ83JQ
I understand that it is expecting type (None, None) and my output layer is sent to a Dense Layer of shape (None, 8, 8, 7), but how would I do the reshaping?
Any help is appreciated, including documentation which you'd think I'd find useful on the subject.
There should be a flatten layer between output from ResNet and the Dense layer.
# Create Inception Res Net model as used in paper
resnet = tf.keras.applications.inception_resnet_v2.InceptionResNetV2()
print("Layers of ResNet: "+str(len(resnet.layers))) //782 layers
x = resnet.layers[-28].output
x = tf.keras.layers.Dropout(0.25)(x)
### Edit here.
x = tf.keras.layers.Flatten()(x)
# Make a prediction layer with 7 nodes for the 7 dir in our train_dir.
predictions_layer = tf.keras.layers.Dense(7, activation='softmax')(x)
# print(resnet.input)
# inputs=resnet.input selects the input layer, outputs=predictions refers to the
# dense layer we created above.
model = tf.keras.Model(inputs=resnet.input, outputs=predictions_layer)
Also, make sure that train_batches is valid.
First Convert X and Y into numpy array then convert x_train,y_train into the 'Standardscale' which coverts large values in small values
X=X.values
y=y.values
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.fit_transform(X_test)
Now apply model.fit
I don't know why I am getting this error.
I saw a some posts to change state_is_tuple=False but it was giving me some other error. I think the error is in the way I defined lstm cell but not sure what should I change? I followed this link which has similar code structure.
Here is my code:
Required placeholders
n_hidden = args.rnn_size
n_layers = args.num_layers
max_sequence_length = args.max_sequence_length
encoderEmbeddingsize = args.encoderEmbeddingsize
decoderEmbeddingsize = args.decoderEmbeddingsize
queVocabsize = len(question_vocab_to_int)
ansVocabsize = len(answer_vocab_to_int)
batch_size = args.batch_size
# Input Embedding for Encoder ## CHECK THE VOCAB SIZE!!!
encoder_input = tf.contrib.layers.embed_sequence(input_data, queVocabsize, encoderEmbeddingsize,
initializer=tf.random_uniform_initializer(0, 1))
print('encoder_input', encoder_input)
# Layers for the model
lstm_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout = rnn.DropoutWrapper(lstm_cell, input_keep_prob=keep_prob) # dropout layer
# Encoder Model
# Make two layer encoder
encoder_multirnn_cell = rnn.MultiRNNCell([dropout]*n_layers)
# Make it bidirectional
print(sequence_length)
encoder_output, encoder_state = tf.nn.dynamic_rnn(encoder_multirnn_cell,
inputs=encoder_input, dtype=tf.float32) # sequence_length=sequence_length,
print('encoder_output', encoder_output)
print('encoder_state', encoder_state)
# preprocessing encoder input
initial_tensor = tf.strided_slice(target, [0, 0], [batch_size, -1], [1, 1])
decoder_input = tf.concat([tf.fill([batch_size, 1], question_vocab_to_int['<GO>']), initial_tensor], 1)
print('decoder_input', decoder_input)
## Input Embedding for the Decoder
decoder_embedding = tf.Variable(tf.random_uniform([queVocabsize+1, decoderEmbeddingsize], 0, 1))
decoder_embedded_input = tf.nn.embedding_lookup(decoder_embedding, decoder_input)
print('check')
print(decoder_embedded_input)
print(decoder_embedding)
## Decoder Model
#with tf.variable_scope("decoding") as decoding_scope:
lstm_decoder_cell = rnn.BasicLSTMCell(n_hidden) # lstm layer
dropout_decoder = rnn.DropoutWrapper(lstm_decoder_cell, input_keep_prob=keep_prob) # droput layer
# decoder
# Make two layer encoder
decoder_multirnn_cell = rnn.MultiRNNCell([dropout_decoder] * n_layers)
# weights = tf.truncated_normal_initializer(stddev=0.1)
# biases = tf.zeros_initializer()
output_layer_function = layers_core.Dense(
ansVocabsize, use_bias=False) #lambda x: tf.contrib.layers.fully_connected(x, queVocabsize, scope=decoding_scope,
# weights_initializer=weights,
# biases_initializer=biases)
#print(decoder_multirnn_cell.output_size)
#decoding_scope.reuse_variables()
print('output_kayer_function', output_layer_function)
# training vs inference!
encoder_output = tf.transpose(encoder_output, [1, 0, 2])
attention_state = tf.zeros([batch_size, 1, decoder_multirnn_cell.output_size * 2])
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
num_units=decoder_multirnn_cell.output_size, memory=encoder_output)
lstm_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(lstm_decoder_cell,
attention_mechanism=attention_mechanism)
attn_zero = lstm_decoder_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
init_state = attn_zero.clone(cell_state=encoder_state)
print(('sequence!!!!!!!!1', sequence_length))
helper = tf.contrib.seq2seq.TrainingHelper(decoder_embedded_input, sequence_length)
# decoder
decoder = tf.contrib.seq2seq.BasicDecoder(lstm_decoder_cell, helper, initial_state=init_state,
output_layer= output_layer_function)
print(decoder)
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
train_pred_drop = tf.nn.dropout(final_outputs, keep_prob)
logits = train_pred_drop.rnn_output
Now, I am getting the error in tf.contrib.seq2seq.dynamic_decode(decoder), as shown below:
Traceback (most recent call last):
File "test_model.py", line 272, in <module>
train_logits, infer_logits = load_model(args, tf.reverse(input_data, [-1]), target, learning_rate, sequence_length, question_vocab_to_int, answer_vocab_to_int, keep_prob ) ## load model here!
File "test_model.py", line 165, in load_model
final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 286, in dynamic_decode
swap_memory=swap_memory)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py", line 234, in body
decoder_finished) = decoder.step(time, inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py", line 138, in step
cell_outputs, cell_state = self._cell(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py", line 1295, in call
cell_output, next_cell_state = self._cell(cell_inputs, cell_state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 438, in call
self._linear = _Linear([inputs, h], 4 * self._num_units, True)
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in __init__
shapes = [a.get_shape() for a in args]
File "/home/saurabh/tfnightly/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1154, in <listcomp>
shapes = [a.get_shape() for a in args]
AttributeError: 'LSTMStateTuple' object has no attribute 'get_shape'
Classify MNIST Digits with Tensorflow by a 2-layer RNN approach. Training works fine, but when evaluating accuracy, incompatible shape of test data is reported.
import tensorflow as tf
import inspect
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot = True)
hm_epochs = 1
n_classes = 10
batch_size = 128
chunk_size = 28
n_chunks = 28
rnn_size = 128
x = tf.placeholder('float', [None, n_chunks,chunk_size])
y = tf.placeholder('float')
def lstm_cell():
if 'reuse' in inspect.getargspec(
tf.contrib.rnn.BasicLSTMCell.__init__).args:
return tf.contrib.rnn.BasicLSTMCell(
rnn_size, forget_bias=0.0, state_is_tuple=True,
reuse=tf.get_variable_scope().reuse)
else:
return tf.contrib.rnn.BasicLSTMCell(
rnn_size, forget_bias=0.0, state_is_tuple=True)
def attn_cell():
return tf.contrib.rnn.DropoutWrapper(
lstm_cell())
def recurrent_neural_network(x):
layer = {'weights':tf.Variable(tf.random_normal([rnn_size,n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes]))}
x = tf.transpose(x, [1,0,2])
x = tf.reshape(x, [-1, chunk_size])
x = tf.split(x, n_chunks, 0)
stacked_lstm = tf.contrib.rnn.MultiRNNCell([attn_cell(),attn_cell()], state_is_tuple=True)
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)
outputs, states = tf.contrib.rnn.static_rnn(stacked_lstm, x,state)
output = tf.matmul(outputs[-1],layer['weights']) + layer['biases']
return output
def train_neural_network(x):
prediction = recurrent_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
epoch_x = epoch_x.reshape((batch_size,n_chunks,chunk_size))
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
testdata= np.reshape( mnist.test.images, (10000, n_chunks, chunk_size))
print("Testdata ",testdata.shape)
print("x ",x)
print('Accuracy:',accuracy.eval({x:testdata, y:mnist.test.labels}))
train_neural_network(x)
However, the shapes of test data and placeholders are printed as follows. Aren't they compatible?
Epoch 0 completed out of 1 loss: 228.159379691
Testdata (10000, 28, 28)
x Tensor("Placeholder:0", shape=(?, 28, 28), dtype=float32)
Error:
Caused by op 'rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/basic_lstm_ce
ll/concat', defined at:
File "main.py", line 90, in <module>
train_neural_network(x)
File "main.py", line 59, in train_neural_network
prediction = recurrent_neural_network(x)
File "main.py", line 52, in recurrent_neural_network
outputs, states = tf.contrib.rnn.static_rnn(stacked_lstm, x,state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py"
, line 1212, in static_rnn
(output, state) = call_cell()
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn.py"
, line 1199, in <lambda>
call_cell = lambda: cell(input_, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\layers\base
.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 752, in __call__
output, new_state = self._cell(inputs, state, scope)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\layers\base
.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\rnn_cel
l_impl.py", line 1021, in _linear
res = math_ops.matmul(array_ops.concat(args, 1), weights)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\array_o
ps.py", line 1048, in concat
name=name)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_arr
ay_ops.py", line 495, in _concat_v2
name=name)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
p_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
ps.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\henry\Anaconda3\lib\site-packages\tensorflow\python\framework\o
ps.py", line 1269, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs
should match: shape[0] = [10000,28] vs. shape[1] = [128,128]
[[Node: rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/basic_lstm
_cell/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/
replica:0/task:0/cpu:0"](split, MultiRNNCellZeroState/DropoutWrapperZeroState/Ba
sicLSTMCellZeroState/zeros_1, rnn/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_ce
ll/basic_lstm_cell/concat/axis)]]
When I print the shape of training data it is (128,28,28). I am confused that why the test data leads to the error because both training data and test data are formatted in the same way, that is (?,n_chunks,chunk_size). Thanks in advance.
The issue is that you always create the initial state with shape set to the training batch size instead of the eval batch size.
This is the culprit line:
initial_state = state = stacked_lstm.zero_state(batch_size, tf.float32)