I'm trying to fit a Keras (TF 2.3.1) model for image classification with multiple binary labels as output. The model consists of an Xception CNN + attention layer + dense classifier, and hitting an error on some TPUs only:
UnimplementedError: {{function_node __inference_train_function_644557}} Compilation failure: Dynamic Spatial Convolution is not supported. This fails on Kaggle TPUs but not on Colab - tested both on TF version 2.3.1.
I was looking here but the suggested solution implies that the image dimensions are not set, which is not the case here. train_df is of type <PrefetchDataset shapes: ((None, 750, 750, 3), (None, 11)), types: (tf.float32, tf.int64)> so each image has size 750x750x3. Each layer has a defined output shape per the below model summary, so the layers that follow them should infer their input shape correctly.
From the error, it seems that the problem is on the layer defined by attn_layer = LocallyConnected2D(.... Passing implementation = 2 is a workaround which lets training complete, but this is not suitable for large models (see LocallyConnected2D documentation)
Modelling code:
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import Xception
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, Conv2D, multiply, LocallyConnected2D, Lambda, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import mean_absolute_error
def create_model():
input_shape = (TARGET_SIZE, TARGET_SIZE, 3)
in_lay = Input(input_shape)
conv_base = Xception(include_top = False, weights = 'imagenet', input_shape = input_shape)
pt_features = conv_base(in_lay)
bn_features = BatchNormalization()(pt_features)
# here we do an attention mechanism to turn pixels in the GAP on an off
attn_layer = Conv2D(64, kernel_size = (1,1), padding = 'same', activation = 'relu')(bn_features)
attn_layer = Conv2D(16, kernel_size = (1,1), padding = 'same', activation = 'relu')(attn_layer)
attn_layer = LocallyConnected2D(1, kernel_size = (1,1), padding = 'valid', activation = 'sigmoid')(attn_layer)
# fan it out to all of the channels
pt_depth = conv_base.get_output_shape_at(0)[-1]
up_c2_w = np.ones((1, 1, 1, pt_depth))
up_c2 = Conv2D(pt_depth, kernel_size = (1,1), padding = 'same',
activation = 'linear', use_bias = False, weights = [up_c2_w])
up_c2.trainable = False
attn_layer = up_c2(attn_layer)
mask_features = multiply([attn_layer, bn_features])
gap_features = GlobalAveragePooling2D()(mask_features)
gap_mask = GlobalAveragePooling2D()(attn_layer)
# to account for missing values from the attention model
gap = Lambda(lambda x: x[0]/x[1], name = 'RescaleGAP')([gap_features, gap_mask])
gap_dr = Dropout(0.5)(gap)
dr_steps = Dropout(0.25)(Dense(1024, activation = 'elu')(gap_dr))
out_layer = Dense(11, activation = 'sigmoid')(dr_steps)
model = Model(inputs = [in_lay], outputs = [out_layer])
model.compile(optimizer = Adam(lr = 0.002), loss = 'binary_crossentropy', metrics = ["AUC"])
return model
with tpu_strategy.scope():
model = create_model()
model.summary()
history = model.fit(
train_df,
epochs = EPOCHS,
steps_per_epoch = STEPS_PER_EPOCH,
validation_data = valid_df,
validation_steps = VALIDATION_STEPS
)
The resulting model summary:
Model: "model_8"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_19 (InputLayer) [(None, 750, 750, 3) 0
__________________________________________________________________________________________________
xception (Model) (None, 24, 24, 2048) 20861480 input_19[0][0]
__________________________________________________________________________________________________
batch_normalization_49 (BatchNo (None, 24, 24, 2048) 8192 xception[1][0]
__________________________________________________________________________________________________
conv2d_67 (Conv2D) (None, 24, 24, 64) 131136 batch_normalization_49[0][0]
__________________________________________________________________________________________________
conv2d_68 (Conv2D) (None, 24, 24, 16) 1040 conv2d_67[0][0]
__________________________________________________________________________________________________
locally_connected2d_9 (LocallyC (None, 24, 24, 1) 9792 conv2d_68[0][0]
__________________________________________________________________________________________________
conv2d_69 (Conv2D) (None, 24, 24, 2048) 2048 locally_connected2d_9[0][0]
__________________________________________________________________________________________________
multiply_9 (Multiply) (None, 24, 24, 2048) 0 conv2d_69[0][0]
batch_normalization_49[0][0]
__________________________________________________________________________________________________
global_average_pooling2d_23 (Gl (None, 2048) 0 multiply_9[0][0]
__________________________________________________________________________________________________
global_average_pooling2d_24 (Gl (None, 2048) 0 conv2d_69[0][0]
__________________________________________________________________________________________________
RescaleGAP (Lambda) (None, 2048) 0 global_average_pooling2d_23[0][0]
global_average_pooling2d_24[0][0]
__________________________________________________________________________________________________
dropout_18 (Dropout) (None, 2048) 0 RescaleGAP[0][0]
__________________________________________________________________________________________________
dense_17 (Dense) (None, 1024) 2098176 dropout_18[0][0]
__________________________________________________________________________________________________
dropout_19 (Dropout) (None, 1024) 0 dense_17[0][0]
__________________________________________________________________________________________________
dense_18 (Dense) (None, 11) 11275 dropout_19[0][0]
==================================================================================================
Total params: 23,123,139
Trainable params: 23,062,467
Non-trainable params: 60,672
__________________________________________________________________________________________________
Full stacktrace + error message:
---------------------------------------------------------------------------
UnimplementedError Traceback (most recent call last)
<ipython-input-53-5130a0bcf331> in <module>
19 validation_data = valid_df,
20 validation_steps = VALIDATION_STEPS,
---> 21 callbacks = [model_save, early_stop, reduce_lr]
22 )
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
64 def _method_wrapper(self, *args, **kwargs):
65 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
---> 66 return method(self, *args, **kwargs)
67
68 # Running inside `run_distribute_coordinator` already.
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
853 context.async_wait()
854 logs = tmp_logs # No error, now safe to assign to logs.
--> 855 callbacks.on_train_batch_end(step, logs)
856 epoch_logs = copy.copy(logs)
857
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/callbacks.py in on_train_batch_end(self, batch, logs)
387 """
388 if self._should_call_train_batch_hooks:
--> 389 logs = self._process_logs(logs)
390 self._call_batch_hook(ModeKeys.TRAIN, 'end', batch, logs=logs)
391
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/callbacks.py in _process_logs(self, logs)
263 """Turns tensors into numpy arrays or Python scalars."""
264 if logs:
--> 265 return tf_utils.to_numpy_or_python_type(logs)
266 return {}
267
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py in to_numpy_or_python_type(tensors)
521 return t # Don't turn ragged or sparse tensors to NumPy.
522
--> 523 return nest.map_structure(_to_single_numpy_or_python_type, tensors)
524
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/nest.py in map_structure(func, *structure, **kwargs)
615
616 return pack_sequence_as(
--> 617 structure[0], [func(*x) for x in entries],
618 expand_composites=expand_composites)
619
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/nest.py in <listcomp>(.0)
615
616 return pack_sequence_as(
--> 617 structure[0], [func(*x) for x in entries],
618 expand_composites=expand_composites)
619
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/utils/tf_utils.py in _to_single_numpy_or_python_type(t)
517 def _to_single_numpy_or_python_type(t):
518 if isinstance(t, ops.Tensor):
--> 519 x = t.numpy()
520 return x.item() if np.ndim(x) == 0 else x
521 return t # Don't turn ragged or sparse tensors to NumPy.
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in numpy(self)
959 """
960 # TODO(slebedev): Consider avoiding a copy for non-CPU or remote tensors.
--> 961 maybe_arr = self._numpy() # pylint: disable=protected-access
962 return maybe_arr.copy() if isinstance(maybe_arr, np.ndarray) else maybe_arr
963
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in _numpy(self)
927 return self._numpy_internal()
928 except core._NotOkStatusException as e:
--> 929 six.raise_from(core._status_to_exception(e.code, e.message), None)
930
931 #property
/opt/conda/lib/python3.7/site-packages/six.py in raise_from(value, from_value)
UnimplementedError: {{function_node __inference_train_function_644557}} Compilation failure: Dynamic Spatial Convolution is not supported: %convolution.30660 = f32[<=8,24,24,2048]{3,2,1,0} convolution(f32[<=8,24,24,1]{3,2,1,0} %add.30633, f32[1,1,1,2048]{3,2,1,0} %get-tuple-element.354), window={size=1x1}, dim_labels=b01f_01io->b01f, metadata={op_type="Conv2D" op_name="model_8/conv2d_69/Conv2D"}
TPU compilation failed
[[{{node tpu_compile_succeeded_assert/_17367812259898276239/_5}}]]
Related
I am running into an issue running a CLDNN model in Python 3. I ran model.summary() and I did not see an issue. I am receiving a sizing/shaping error between Keras and Tensorflow.
Below is the code with the output error. Any input would be greatly apricated.
Update: reformatted the code and supplied the output as text.
def cldnn():
model = Sequential()
#input layer
model.add(keras.layers.InputLayer(input_shape=(2, 128, 1)))
#first layer
model.add(Conv2D(256, (1, 3), activation='relu', use_bias=True))
model.add(Dropout(0.2))
#second layer
model.add(Conv2D(256, (2, 3), activation='relu', use_bias=True))
#third layer
model.add(Conv2D(80, (1, 3), activation='relu', use_bias=True))
model.add(Dropout(0.2))
#fourth layer
model.add(Conv2D(80, (1, 3), activation='relu', use_bias=True))
#reshape data
model.add(Reshape((120,80)))
#fifth layer
model.add(LSTM(50, activation='tanh', return_sequences=True))
#sixth layer
model.add(Dense(128,
activation='relu',
use_bias=True,
kernel_initializer='normal'))
#output layer
model.add(Dense(10, activation='softmax', use_bias=True))
model.summary()
return model
#Create instance of CNN model graph
cldnn = cldnn()
#Compile model using an appropriate loss and optimizer algorithm
cldnn.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
cldnn_mdata = cldnn.fit(data_train, labels_train,
validation_data=(data_test, labels_test),
epochs=15,
batch_size=1024,
shuffle=True)
#Print accuracy of model on testing set after training
scores = cldnn.evaluate(data_test, labels_test)
print("Accuracy: %.2f%%" %(scores[1]*100))
#OUTPUT
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param # =================================================================
conv2d_16 (Conv2D) (None, 2, 126, 256) 1024
_________________________________________________________________
dropout_8 (Dropout) (None, 2, 126, 256) 0
_________________________________________________________________
conv2d_17 (Conv2D) (None, 1, 124, 256) 393472
_________________________________________________________________
conv2d_18 (Conv2D) (None, 1, 122, 80) 61520
_________________________________________________________________
dropout_9 (Dropout) (None, 1, 122, 80) 0
_________________________________________________________________
conv2d_19 (Conv2D) (None, 1, 120, 80) 19280
_________________________________________________________________
reshape_3 (Reshape) (None, 120, 80) 0
_________________________________________________________________
lstm_3 (LSTM) (None, 120, 50) 26200
_________________________________________________________________
dense_6 (Dense) (None, 120, 128) 6528
_________________________________________________________________
dense_7 (Dense) (None, 120, 10) 1290
=================================================================
Total params: 509,314
Trainable params: 509,314
Non-trainable params: 0
_________________________________________________________________
Epoch 1/15
WARNING:tensorflow:Model was constructed with shape (None, 2, 128, 1) for input KerasTensor(type_spec=TensorSpec(shape=(None, 2, 128, 1), dtype=tf.float32, name='input_5'), name='input_5', description="created by layer 'input_5'"), but it was called on an input with incompatible shape (None, 28, 28, 1).
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-9-0e0ee3a37c65> in <module>()
52 epochs=15,
53 batch_size=1024,
---> 54 shuffle=True)
55
56 #Print accuracy of model on testing set after training
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:787 train_step
y_pred = self(x, training=True)
/usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py:1037 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/engine/sequential.py:369 call
return super(Sequential, self).call(inputs, training=training, mask=mask)
/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py:415 call
inputs, training=training, mask=mask)
/usr/local/lib/python3.7/dist-packages/keras/engine/functional.py:550 _run_internal_graph
outputs = node.layer(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/engine/base_layer.py:1037 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/layers/core.py:539 call
result.set_shape(self.compute_output_shape(inputs.shape))
/usr/local/lib/python3.7/dist-packages/keras/layers/core.py:530 compute_output_shape
self.target_shape)
/usr/local/lib/python3.7/dist-packages/keras/layers/core.py:518 _fix_unknown_dimension
raise ValueError(msg)
ValueError: total size of new array must be unchanged, input_shape = [27, 20, 80], output_shape = [120, 80]
I have a problem with my machine learning code
here is the model :
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=input_shape),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(64, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(128, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(512, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(1, activation='softmax')])
model.summary()
This is my model.summary() result :
Model: "sequential_32"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_128 (Conv2D) (None, 148, 148, 32) 896
_________________________________________________________________
max_pooling2d_128 (MaxPoolin (None, 74, 74, 32) 0
_________________________________________________________________
conv2d_129 (Conv2D) (None, 72, 72, 64) 18496
_________________________________________________________________
max_pooling2d_129 (MaxPoolin (None, 36, 36, 64) 0
_________________________________________________________________
conv2d_130 (Conv2D) (None, 34, 34, 128) 73856
_________________________________________________________________
max_pooling2d_130 (MaxPoolin (None, 17, 17, 128) 0
_________________________________________________________________
conv2d_131 (Conv2D) (None, 15, 15, 512) 590336
_________________________________________________________________
max_pooling2d_131 (MaxPoolin (None, 7, 7, 512) 0
_________________________________________________________________
flatten_32 (Flatten) (None, 25088) 0
_________________________________________________________________
dense_79 (Dense) (None, 128) 3211392
_________________________________________________________________
dense_80 (Dense) (None, 1) 129
=================================================================
Total params: 3,895,105
Trainable params: 3,895,105
Non-trainable params: 0
And this is the compile setting i use :
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
And this is the fit model code :
EPOCH = 100
history = model.fit(train_data,
steps_per_epoch=len(train_generator),
epochs=EPOCH,
validation_data=val_data,
validation_steps=len(val_generator),
shuffle=True,
verbose = 1)
For the train_data i create use tensorflow tf.data cause i think it is more compatible with tf.keras. this is the tf.data generator function code :
def tf_data_generator(generator, input_shape):
num_class = generator.num_classes
tf_generator = tf.data.Dataset.from_generator(
lambda: generator,
output_types=(tf.float32, tf.float32),
output_shapes=([None
, input_shape[0]
, input_shape[1]
, input_shape[2]]
,[None, num_class])
)
return tf_generator
train_data = tf_data_generator(train_generator, input_shape)
val_data = tf_data_generator(val_generator, input_shape)
actually i got that function from medium.com as source. But i got error when try to train my machine learning code, can someone help me solve the error, this is the error message :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-16-448faadd058c> in <module>()
6 validation_steps=len(val_generator),
7 shuffle=True,
----> 8 verbose = 1)
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
984 except Exception as e: # pylint:disable=broad-except
985 if hasattr(e, "ag_error_metadata"):
--> 986 raise e.ag_error_metadata.to_exception(e)
987 else:
988 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:855 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:845 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:838 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:797 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:155 __call__
losses = call_fn(y_true, y_pred)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:259 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:1644 categorical_crossentropy
y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/backend.py:4862 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/tensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 3) and (None, 1) are incompatible
Sorry if my question is confusing, i still new in machine learning field. Thanks for helping me
I guess you intend to get a multi-class classifier, for 3 classes. If that is the case, you wrongly assigned the last layer to a DENSE of size 1. You can solve the issue by replacing this line:
tf.keras.layers.Dense(1, activation='softmax')])
by this:
tf.keras.layers.Dense(3, activation='softmax')])
I'm trying to isolate some user specific parameters by having matrix of parameters where each array would learn parameters specific to that user.
I want to index the matrix using the user id, and concatenate the parameters to the other features.
Lastly, have some fully-connected layers to get desirable outcome.
However, I keep getting this error on the last line of the code.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-93de3591ccf0> in <module>
20 # combined = tf.keras.layers.Concatenate(axis=-1)([le_param, le])
21
---> 22 net = tf.keras.layers.Dense(128)(combined)
~/anaconda3/envs/tam-env/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
793 # framework.
794 if build_graph and base_layer_utils.needs_keras_history(inputs):
--> 795 base_layer_utils.create_keras_history(inputs)
796
797 # Clear eager losses on top level model call.
~/anaconda3/envs/tam-env/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py in create_keras_history(tensors)
182 keras_tensors: The Tensors found that came from a Keras Layer.
183 """
--> 184 _, created_layers = _create_keras_history_helper(tensors, set(), [])
185 return created_layers
186
~/anaconda3/envs/tam-env/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py in _create_keras_history_helper(tensors, processed_ops, created_layers)
229 constants[i] = backend.function([], op_input)([])
230 processed_ops, created_layers = _create_keras_history_helper(
--> 231 layer_inputs, processed_ops, created_layers)
232 name = op.name
233 node_def = op.node_def.SerializeToString()
~/anaconda3/envs/tam-env/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py in _create_keras_history_helper(tensors, processed_ops, created_layers)
229 constants[i] = backend.function([], op_input)([])
230 processed_ops, created_layers = _create_keras_history_helper(
--> 231 layer_inputs, processed_ops, created_layers)
232 name = op.name
233 node_def = op.node_def.SerializeToString()
~/anaconda3/envs/tam-env/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py in _create_keras_history_helper(tensors, processed_ops, created_layers)
227 else:
228 with ops.init_scope():
--> 229 constants[i] = backend.function([], op_input)([])
230 processed_ops, created_layers = _create_keras_history_helper(
231 layer_inputs, processed_ops, created_layers)
~/anaconda3/envs/tam-env/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py in __call__(self, inputs)
3746 return nest.pack_sequence_as(
3747 self._outputs_structure,
-> 3748 [x._numpy() for x in outputs], # pylint: disable=protected-access
3749 expand_composites=True)
3750
~/anaconda3/envs/tam-env/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py in <listcomp>(.0)
3746 return nest.pack_sequence_as(
3747 self._outputs_structure,
-> 3748 [x._numpy() for x in outputs], # pylint: disable=protected-access
3749 expand_composites=True)
3750
ValueError: Cannot convert a Tensor of dtype resource to a NumPy array.
Code to reproduce the error:
import tensorflow as tf
num_uids = 50
input_uid = tf.keras.layers.Input(shape=(1,), dtype=tf.int32)
params = tf.Variable(tf.random.normal((num_uids, 9)), trainable=True)
param = tf.gather_nd(params, input_uid)
input_shared_features = tf.keras.layers.Input(shape=(128,), dtype=tf.float32)
combined = tf.concat([param, input_shared_features], axis=-1)
net = tf.keras.layers.Dense(128)(combined)
There are few things I've tried:
I tried to use tf.keras.layers.Lambda to encapsulate tf.gather_nd and tf.concat.
I tried replacing tf.concat with tf.keras.layers.Concatenate.
Oddly enough if I specify the number of items and replace Input with tf.Variable, the code would work as expected:
import tensorflow as tf
num_uids = 50
input_uid = tf.Variable(tf.ones((32, 1), dtype=tf.int32))
params = tf.Variable(tf.random.normal((num_uids, 9)), trainable=True)
param = tf.gather_nd(params, input_uid)
input_shared_features = tf.Variable(tf.ones((32, 128), dtype=tf.float32))
combined = tf.concat([param, input_shared_features], axis=-1)
net = tf.keras.layers.Dense(128)(combined)
I'm using Tensorflow 2.1 with Python 3.6.10
I faced a similar issue when I was trying to use a TensorFlow table lookup (tf.lookup.StaticHashTable) in TensorFlow 2.x. I ended up solving it by keeping it inside a Custom Keras Layer. The same solution seems to have worked for this problem as well—at least until to the version that's mentioned in the question. (I tried using TensorFlow 2.0, 2.1, and 2.2 and it worked in all of these versions.)
import tensorflow as tf
num_uids = 50
input_uid = tf.keras.Input(shape=(1,), dtype=tf.int32)
input_shared_features = tf.keras.layers.Input(shape=(128,), dtype=tf.float32)
class CustomLayer(tf.keras.layers.Layer):
def __init__(self,num_uids):
super(CustomLayer, self).__init__(trainable=True,dtype=tf.int64)
self.num_uids = num_uids
def build(self,input_shape):
self.params = tf.Variable(tf.random.normal((num_uids, 9)), trainable=True)
self.built=True
def call(self, input_uid,input_shared_features):
param = tf.gather_nd(self.params, input_uid)
combined = tf.concat([param, input_shared_features], axis=-1)
return combined
def get_config(self):
config = super(CustomLayer, self).get_config()
config.update({'num_uids': self.num_uids})
return config
combined = CustomLayer(num_uids)(input_uid,input_shared_features)
net = tf.keras.layers.Dense(128)(combined)
model = tf.keras.Model(inputs={'input_uid':input_uid,'input_shared_features':input_shared_features},outputs=net)
model.summary()
Here's what model summary looked like:
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 1)] 0
__________________________________________________________________________________________________
input_2 (InputLayer) [(None, 128)] 0
__________________________________________________________________________________________________
custom_layer (CustomLayer) (None, 137) 450 input_1[0][0]
__________________________________________________________________________________________________
dense (Dense) (None, 128) 17664 custom_layer[0][0]
==================================================================================================
Total params: 18,114
Trainable params: 18,114
Non-trainable params: 0
For more info you can refer to the tf.keras.layers.Layer documentation.
In case you want to refer to the table lookup problem and solution, here are the links:
git issue
Sample working code for TensorFlow lookup
While Jithin Jees's answer is very clear, Shown below is a slightly different workaround using Concatenate Operation:
import tensorflow as tf
num_uids = 50
#input_uid = tf.keras.layers.Input(shape=(1,), dtype=tf.int32, batch_size = 32)
#input_uid = tf.keras.layers.Input(shape=(1,), dtype=tf.int32)
#params = tf.Variable(tf.random.normal((num_uids, 9)), trainable=True)
#param = tf.gather_nd(params, input_uid)
indices = tf.keras.layers.Input(name='indices', shape=(), dtype='int32')
params = tf.Variable(params)
class GatherLayer(tf.keras.layers.Layer):
def call(self, indices, params):
return tf.gather(params, indices)
output = GatherLayer()(indices, params)
#input_shared_features = tf.keras.layers.Input(shape=(128,), dtype=tf.float32, batch_size = 32)
input_shared_features = tf.keras.layers.Input(shape=(128,), dtype=tf.float32)
combined = tf.concat([output, input_shared_features], axis=-1)
net = tf.keras.layers.Dense(128)(combined)
For more details, please refer this Github Issue.
I'm trying to implement a sequence to sequence model with attention mechanism for building a chat-bot, but i stuck with below error can anyone help where i did wrong
Below is the code for Attention mechanism(Bhaidu attention) inspired from tensor flow examples
#Bahdanu attention
#parameters to pass this attention
'''
1.Encoder state's i.e.., state_c, state_h
2.encoder_outputs
3.decoder_embedding which is in decoder part
4.you will get a context vector named "input_to_decoder" pass this as input to decoder lstm layer
'''
def B_Attention_layer(state_h,state_c,encoder_outputs,decoder_embedding):
d0 = tf.keras.layers.Dense(1024,name='dense_layer_1')
d1 = tf.keras.layers.Dense(1024,name='dense_layer_2')
d2 = tf.keras.layers.Dense(1024,name='dense_layer_3')
hidden_with_time_axis_1 = tf.keras.backend.expand_dims(state_h, 1)
#hidden_with_time_axis_1 = state_h
hidden_with_time_axis_2 = tf.keras.backend.expand_dims(state_c, 1)
#hidden_with_time_axis_2 = state_c
#hidden_states = tf.keras.layers.concatenate([state_h,state_c],axis=-1)
#all_states = tf.keras.layers.concatenate()
score = d0(tf.keras.activations.tanh(encoder_outputs) + d1(hidden_with_time_axis_1) + d2(hidden_with_time_axis_2))
attention_weights = tf.keras.activations.softmax(score, axis=1)
#attention = Dense(38)(attention_weights)
context_vector = attention_weights * encoder_outputs
context_vector = tf.keras.backend.sum(context_vector, axis=1)
context_vector = tf.keras.backend.expand_dims(context_vector, 1)
context_vector = tf.keras.backend.reshape(context_vector,[-1,38,1024])
input_to_decoder = tf.keras.layers.Concatenate(axis=-1)([context_vector,decoder_embedding])
return input_to_decoder
Below is Encoder-decoder Model i,e.,,, seq-seq model
#Encoder inputs
encoder_inputs = tf.keras.layers.Input(shape=(38,),name='encoder_input_layer')
encoder_embedding = tf.keras.layers.Embedding(vocab_size, 1024, mask_zero=True,name='encoder_embedding_layer')(encoder_inputs)
encoder_outputs , state_h , state_c = tf.keras.layers.LSTM(1024, return_state=True,return_sequences=True)(encoder_embedding)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = tf.keras.layers.Input(shape=(38,),name='decoder_input_layer')
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the.
# return states in the training model, but we will use them in inference.
decoder_embedding = tf.keras.layers.Embedding(vocab_size, 1024, mask_zero=True,name='decoder_embedding_layer')(decoder_inputs)
decoder_lstm = tf.keras.layers.LSTM(1024, return_state=True, return_sequences=True)
#Attention layer which is defind in above function
attention_layer = B_Attention_layer(state_h, state_c, encoder_outputs, decoder_embedding)
decoder_outputs, _, _ = decoder_lstm(attention_layer, initial_state=encoder_states)
decoder_dense = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(vocab_size, activation='softmax'))
output = decoder_dense(decoder_outputs)
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = tf.keras.models.Model([encoder_inputs, decoder_inputs], output)
#compiling the model
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='categorical_crossentropy')
#model summary
model.summary()
Below is my model output summary
Model: "model_11"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
encoder_input_layer (InputLayer [(None, 38)] 0
__________________________________________________________________________________________________
encoder_embedding_layer (Embedd (None, 38, 1024) 5796864 encoder_input_layer[0][0]
__________________________________________________________________________________________________
lstm_32 (LSTM) [(None, 38, 1024), ( 8392704 encoder_embedding_layer[0][0]
__________________________________________________________________________________________________
tf_op_layer_ExpandDims_30 (Tens [(None, 1, 1024)] 0 lstm_32[0][1]
__________________________________________________________________________________________________
tf_op_layer_Tanh_9 (TensorFlowO [(None, 38, 1024)] 0 lstm_32[0][0]
__________________________________________________________________________________________________
dense_layer_2 (Dense) (None, 1, 1024) 1049600 tf_op_layer_ExpandDims_30[0][0]
__________________________________________________________________________________________________
tf_op_layer_ExpandDims_31 (Tens [(None, 1, 1024)] 0 lstm_32[0][2]
__________________________________________________________________________________________________
tf_op_layer_add_19 (TensorFlowO [(None, 38, 1024)] 0 tf_op_layer_Tanh_9[0][0]
dense_layer_2[0][0]
__________________________________________________________________________________________________
dense_layer_3 (Dense) (None, 1, 1024) 1049600 tf_op_layer_ExpandDims_31[0][0]
__________________________________________________________________________________________________
tf_op_layer_add_20 (TensorFlowO [(None, 38, 1024)] 0 tf_op_layer_add_19[0][0]
dense_layer_3[0][0]
__________________________________________________________________________________________________
dense_layer_1 (Dense) (None, 38, 1024) 1049600 tf_op_layer_add_20[0][0]
__________________________________________________________________________________________________
tf_op_layer_Max_7 (TensorFlowOp [(None, 1, 1024)] 0 dense_layer_1[0][0]
__________________________________________________________________________________________________
tf_op_layer_sub_7 (TensorFlowOp [(None, 38, 1024)] 0 dense_layer_1[0][0]
tf_op_layer_Max_7[0][0]
__________________________________________________________________________________________________
tf_op_layer_Exp_7 (TensorFlowOp [(None, 38, 1024)] 0 tf_op_layer_sub_7[0][0]
__________________________________________________________________________________________________
tf_op_layer_Sum_14 (TensorFlowO [(None, 1, 1024)] 0 tf_op_layer_Exp_7[0][0]
__________________________________________________________________________________________________
tf_op_layer_truediv_7 (TensorFl [(None, 38, 1024)] 0 tf_op_layer_Exp_7[0][0]
tf_op_layer_Sum_14[0][0]
__________________________________________________________________________________________________
tf_op_layer_mul_9 (TensorFlowOp [(None, 38, 1024)] 0 tf_op_layer_truediv_7[0][0]
lstm_32[0][0]
__________________________________________________________________________________________________
tf_op_layer_Sum_15 (TensorFlowO [(None, 1024)] 0 tf_op_layer_mul_9[0][0]
__________________________________________________________________________________________________
tf_op_layer_ExpandDims_32 (Tens [(None, 1, 1024)] 0 tf_op_layer_Sum_15[0][0]
__________________________________________________________________________________________________
decoder_input_layer (InputLayer [(None, 38)] 0
__________________________________________________________________________________________________
tf_op_layer_Reshape_17 (TensorF [(None, 38, 1024)] 0 tf_op_layer_ExpandDims_32[0][0]
__________________________________________________________________________________________________
decoder_embedding_layer (Embedd (None, 38, 1024) 5796864 decoder_input_layer[0][0]
__________________________________________________________________________________________________
concatenate_13 (Concatenate) (None, 38, 2048) 0 tf_op_layer_Reshape_17[0][0]
decoder_embedding_layer[0][0]
__________________________________________________________________________________________________
lstm_33 (LSTM) [(None, 38, 1024), ( 12587008 concatenate_13[0][0]
lstm_32[0][1]
lstm_32[0][2]
__________________________________________________________________________________________________
time_distributed_11 (TimeDistri (None, 38, 5661) 5802525 lstm_33[0][0]
==================================================================================================
Total params: 41,524,765
Trainable params: 41,524,765
Non-trainable params: 0
__________________________________________________________________________________________________
when i'm fitting the model i'm facing below tensor shape error can anyone help me what exactly that
model.fit([encoder_input_data, decoder_input_data], decoder_output_data, batch_size=86, epochs=10, validation_split=0.2)
Error :
Train on 4644 samples, validate on 1162 samples
Epoch 1/10
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-78-781d7ca43c98> in <module>()
----> 1 get_ipython().run_cell_magic('time', '', 'model.fit([encoder_input_data, decoder_input_data], decoder_output_data, batch_size=86, epochs=10, validation_split=0.2) ')
7 frames
/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2115 magic_arg_s = self.var_expand(line, stack_depth)
2116 with self.builtin_trap:
-> 2117 result = fn(magic_arg_s, cell)
2118 return result
2119
</usr/local/lib/python3.6/dist-packages/decorator.py:decorator-gen-60> in time(self, line, cell, local_ns)
/usr/local/lib/python3.6/dist-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
/usr/local/lib/python3.6/dist-packages/IPython/core/magics/execution.py in time(self, line, cell, local_ns)
1187 if mode=='eval':
1188 st = clock2()
-> 1189 out = eval(code, glob, local_ns)
1190 end = clock2()
1191 else:
<timed eval> in <module>()
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
725 max_queue_size=max_queue_size,
726 workers=workers,
--> 727 use_multiprocessing=use_multiprocessing)
728
729 def evaluate(self,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
673 validation_steps=validation_steps,
674 validation_freq=validation_freq,
--> 675 steps_name='steps_per_epoch')
676
677 def evaluate(self,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
392
393 # Get outputs.
--> 394 batch_outs = f(ins_batch)
395 if not isinstance(batch_outs, list):
396 batch_outs = [batch_outs]
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/backend.py in __call__(self, inputs)
3474
3475 fetched = self._callable_fn(*array_vals,
-> 3476 run_metadata=self.run_metadata)
3477 self._call_fetch_callbacks(fetched[-len(self._fetches):])
3478 output_structure = nest.pack_sequence_as(
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py in __call__(self, *args, **kwargs)
1470 ret = tf_session.TF_SessionRunCallable(self._session._session,
1471 self._handle, args,
-> 1472 run_metadata_ptr)
1473 if run_metadata:
1474 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
InvalidArgumentError: Input to reshape is a tensor with 88064 values, but the requested shape requires a multiple of 38912
[[{{node Reshape_17}}]]
Anyone help me i'm not understanding where i did wrong
the error is on line
context_vector = tf.keras.backend.reshape(context_vector,[-1,38,1024])
you are trying to reshape a tensor with 88064 values into a tensor with 38912 values, hence the error (38*1024 = 38912).
I'm not sure about the shape of decoder_embedding in
input_to_decoder = tf.keras.layers.Concatenate(axis=-1)([context_vector,decoder_embedding])
and your future plans but maybe reshaping the vector with the right values will do the trick.
88064/1024 = 86, so you could do:
context_vector = tf.keras.backend.reshape(context_vector,[-1,86,1024])
Hi I am trying to do a multi-class classification using embedding, and stack Conv1D with Bidirectional LSTM, Here is my script:
embed_dim = 100
lstm_out = 128
max_features = 5000
model8 = Sequential()
model8.add(Embedding(max_features, embed_dim, input_length = X.shape[1]))
model8.add(Dropout(0.2))
model8.add(Conv1D(filters=100, kernel_size=3, padding='same', activation='relu'))
model8.add(MaxPooling1D(pool_size=2))
model8.add(Bidirectional(LSTM(lstm_out)))
model8.add(Dense(124,activation='softmax'))
model8.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print model8.summary()
I got error message as below:
TypeErrorTraceback (most recent call last)
<ipython-input-51-6c831fc4581f> in <module>()
9 model8.add(Embedding(max_features, embed_dim))
10 model8.add(Dropout(0.2))
---> 11 model8.add(Conv1D(filters=100, kernel_size=3, padding='same', activation='relu'))
12 model8.add(MaxPooling1D(pool_size=2))
13 model8.add(Bidirectional(LSTM(lstm_out)))
/jupyter/local/lib/python2.7/site-packages/tensorflow/python/training/checkpointable/base.pyc in _method_wrapper(self, *args, **kwargs)
362 self._setattr_tracking = False # pylint: disable=protected-access
363 try:
--> 364 method(self, *args, **kwargs)
365 finally:
366 self._setattr_tracking = previous_value # pylint: disable=protected-access
/jupyter/local/lib/python2.7/site-packages/tensorflow/python/keras/engine/sequential.pyc in add(self, layer)
128 raise TypeError('The added layer must be '
129 'an instance of class Layer. '
--> 130 'Found: ' + str(layer))
131 self.built = False
132 if not self._layers:
TypeError: The added layer must be an instance of class Layer. Found: <keras.layers.convolutional.Conv1D object at 0x7f62907f8590>
What I did wrong? Thanks!
from keras.layers import Dense, Embedding, Dropout, LSTM
from keras.models import Sequential
from keras.layers import Bidirectional
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
embed_dim = 100
lstm_out = 128
max_features = 5000
model8 = Sequential()
model8.add(Embedding(max_features, embed_dim, input_length = X.shape[1]))
model8.add(Dropout(0.2))
model8.add(Conv1D(filters=100, kernel_size=3, padding='same', activation='relu'))
model8.add(MaxPooling1D(pool_size=2))
model8.add(Bidirectional(LSTM(lstm_out)))
model8.add(Dense(124,activation='softmax'))
model8.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics =
['accuracy'])
print(model8.summary())
Prints the model summary without any error:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_8 (Embedding) (None, 100, 100) 500000
_________________________________________________________________
dropout_5 (Dropout) (None, 100, 100) 0
_________________________________________________________________
conv1d_3 (Conv1D) (None, 100, 100) 30100
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 50, 100) 0
_________________________________________________________________
bidirectional_7 (Bidirection (None, 256) 234496
_________________________________________________________________
dense_7 (Dense) (None, 124) 31868
=================================================================
Total params: 796,464
Trainable params: 796,464
Non-trainable params: 0
_________________________________________________________________
None