I'm using keras (tf.keras) in tensorflow 2.0.0
I've a network, whose input is an image and output is also an image. I want to use a combination of MSE, MSE in VGG feature space and some other losses, which depend on intermediate layer output. I'm defining a custom loss function. I'm able to build the model, compile with the custom loss. But when I train using fit_generator, I'm getting a SymbolicException saying Inputs to eager execution function cannot be Keras symbolic tensors
Full Code:
Train File:
def __init__(self, gray_images: bool, verbose: bool = True):
super().__init__(gray_images, verbose)
self.model = None
self.vgg_feature_extractor = VggFeaturesExtractor(model_name='vgg16', layers=[3, 6, 10])
def build_model():
image_input = Input(shape=(None, None, num_input_channels))
out1 = self.build_out1_model(image_input, num_filters, depth_t)
out2 = self.build_out2_model(image_input, num_filters, depth_n, use_bnorm)
enhanced_image = ... # Some function of image_input, out1 and out2
self.model = Model(inputs=image_input, outputs=enhanced_image)
self.model.add_loss(loss_weights[1] * self.loss2(out2))
self.model.compile(optimizer='adam', loss=self.vgg_loss)
def vgg_loss(self, gt_image, est_image):
gt_features = self.vgg_feature_extractor.extract_features(gt_image)
est_features = self.vgg_feature_extractor.extract_features(est_image)
loss = tf.reduce_mean(tf.square(gt_features[0] - est_features[0])) + \
tf.reduce_mean(tf.square(gt_features[1] - est_features[1])) + \
tf.reduce_mean(tf.square(gt_features[2] - est_features[2]))
return loss
VggFeatures.py:
class VggFeaturesExtractor:
def __init__(self, model_name: str, layers: List[int]):
self.model_name = model_name
self.layers = layers
if model_name == 'vgg16':
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
vgg_model = VGG16(include_top=False)
self.preprocess_input = preprocess_input
elif model_name == 'vgg19':
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
vgg_model = VGG19(include_top=False)
self.preprocess_input = preprocess_input
else:
raise RuntimeError(f'Unknown Model: {model_name}')
outputs = []
for layer_num in layers:
outputs.append(vgg_model.layers[layer_num].output)
self.feature_extractor = keras.Model(inputs=vgg_model.input, outputs=outputs)
def extract_features(self, images: numpy.ndarray):
preprocessed_images = self.preprocess_input(images)
features = self.feature_extractor(preprocessed_images)
return features
Stack trace:
Epoch 1/1000
Traceback (most recent call last):
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py", line 61, in quick_execute
num_outputs)
TypeError: An op outside of the function building code is being passed
a "Graph" tensor. It is possible to have Graph tensors
leak out of the function building context by including a
tf.init_scope in your function building code.
For example, the following function will fail:
#tf.function
def has_init_scope():
my_constant = tf.constant(1.)
with tf.init_scope():
added = my_constant * 2
The graph tensor has name: StridedSliceGrad:0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/media/nagabhushan/Data02/SNB/IISc/Research/.../Workspace/Ideas/01_Supervised/src/N09.py", line 363, in <module>
main()
File "/media/nagabhushan/Data02/SNB/IISc/Research/.../Workspace/Ideas/01_Supervised/src/N09.py", line 343, in main
args.save_interval)
File "/media/nagabhushan/Data02/SNB/IISc/Research/.../Workspace/Ideas/01_Supervised/src/N09.py", line 92, in train_model
verbose=self.verbose)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1297, in fit_generator
steps_name='steps_per_epoch')
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_generator.py", line 265, in model_iteration
batch_outs = batch_function(*batch_data)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 973, in train_on_batch
class_weight=class_weight, reset_metrics=reset_metrics)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py", line 264, in train_on_batch
output_loss_metrics=model._output_loss_metrics)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py", line 311, in train_on_batch
output_loss_metrics=output_loss_metrics))
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py", line 268, in _process_single_batch
grads = tape.gradient(scaled_total_loss, trainable_weights)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/backprop.py", line 1014, in gradient
unconnected_gradients=unconnected_gradients)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/imperative_grad.py", line 76, in imperative_grad
compat.as_str(unconnected_gradients.value))
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 911, in _backward_function_wrapper
processed_args, remapped_captures)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 1224, in _call_flat
ctx, args, cancellation_manager=cancellation_manager)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 511, in call
ctx=ctx)
File "/media/nagabhushan/Data02/SoftwareFiles/Anaconda/anaconda3/envs/.../lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py", line 75, in quick_execute
"tensors, but found {}".format(keras_symbolic_tensors))
tensorflow.python.eager.core._SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'StridedSliceGrad:0' shape=(16, 64, 64, 3) dtype=float32>]
Process finished with exit code 1
Note:
1. If I replace self.model.compile(optimizer='adam', loss=self.vgg_loss) with self.model.compile(optimizer='adam', loss='mse'), code works fine, which implies the other part of code is working correctly.
2. Almost every question I found on SO regarding VGG loss advises to append VGG network to the main network, set trainable=False for VGG network and then train with MSE loss. But I can't do that, since I have many components in my loss function.
I was able to fix this issue by disabling eager execution. In tensorflow 2.0, eager execution is enabled by default.
tf.compat.v1.disable_eager_execution()
I didn't understand how this was able to fix the issue though. If anybody stumbles on a similar problem, you can try disabling eager execution.
Related
I am trying to figure out sentiment classification on movie reviews using BERT, transformers and tensorflow. This is the code I currently have:
def read_dataset(filename, model_name="bert-base-uncased"):
"""Reads a dataset from the specified path and returns sentences and labels"""
tokenizer = BertTokenizer.from_pretrained(model_name)
with open(filename, "r", encoding="utf-8") as f:
lines = f.readlines()
# preallocate memory for the data
sents, labels = list(), np.empty((len(lines), 1), dtype=int)
for i, line in enumerate(lines):
text, str_label, _ = line.split("\t")
labels[i] = int(str_label.split("=")[1] == "POS")
sents.append(text)
return dict(tokenizer(sents, padding=True, truncation=True, return_tensors="tf")), labels
class BertMLP(tf.keras.Model):
def __init__(self, embed_batch_size=100, model_name="bert-base-cased"):
super(BertMLP, self).__init__()
self.bs = embed_batch_size
self.model = TFBertModel.from_pretrained(model_name)
self.classification_head = tf.keras.models.Sequential(
layers = [
tf.keras.Input(shape=(self.model.config.hidden_size,)),
tf.keras.layers.Dense(350, activation="tanh"),
tf.keras.layers.Dense(200, activation="tanh"),
tf.keras.layers.Dense(50, activation="tanh"),
tf.keras.layers.Dense(1, activation="sigmoid", use_bias=False)
]
)
def call(self, inputs):
outputs = self.model(inputs)
return outputs
def evaluate(model, inputs, labels, loss_func):
mean_loss = tf.keras.metrics.Mean(name="train_loss")
accuracy = tf.keras.metrics.BinaryAccuracy(name="train_accuracy")
predictions = model(inputs)
mean_loss(loss_func(labels, predictions))
accuracy(labels, predictions)
return mean_loss.result(), accuracy.result() * 100
if __name__ == "__main__":
train = read_dataset("datasets/rt-polarity.train.vecs")
dev = read_dataset("datasets/rt-polarity.dev.vecs")
test = read_dataset("datasets/rt-polarity.test.vecs")
mlp = BertMLP()
mlp.compile(tf.keras.optimizers.SGD(learning_rate=0.01), loss='mse')
dev_loss, dev_acc = evaluate(mlp, *dev, tf.keras.losses.MeanSquaredError())
print("Before training:", f"Dev Loss: {dev_loss}, Dev Acc: {dev_acc}")
mlp.fit(*train, epochs=10, batch_size=10)
dev_loss, dev_acc = evaluate(mlp, *dev, tf.keras.losses.MeanSquaredError())
print("After training:", f"Dev Loss: {dev_loss}, Dev Acc: {dev_acc}")
However, when I run this code, I get an error:
Traceback (most recent call last):
File "C:\Users\home\anaconda3\lib\site-packages\spyder_kernels\py3compat.py", line 356, in compat_exec
exec(code, globals, locals)
File "c:\users\home\downloads\mlp.py", line 60, in <module>
dev_loss, dev_acc = evaluate(mlp, *dev, tf.keras.losses.MeanSquaredError())
File "c:\users\home\downloads\mlp.py", line 46, in evaluate
predictions = model(inputs)
File "C:\Users\home\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "c:\users\home\downloads\mlp.py", line 39, in call
outputs = self.model(inputs)
File "C:\Users\home\anaconda3\lib\site-packages\transformers\modeling_tf_utils.py", line 409, in run_call_with_unpacked_inputs
return func(self, **unpacked_inputs)
File "C:\Users\home\anaconda3\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 1108, in call
outputs = self.bert(
File "C:\Users\home\anaconda3\lib\site-packages\transformers\modeling_tf_utils.py", line 409, in run_call_with_unpacked_inputs
return func(self, **unpacked_inputs)
File "C:\Users\home\anaconda3\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 781, in call
embedding_output = self.embeddings(
File "C:\Users\home\anaconda3\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 203, in call
inputs_embeds = tf.gather(params=self.weight, indices=input_ids)
InvalidArgumentError: Exception encountered when calling layer "embeddings" (type TFBertEmbeddings).
indices[1174,8] = 29550 is not in [0, 28996) [Op:ResourceGather]
Call arguments received:
• input_ids=tf.Tensor(shape=(1599, 73), dtype=int32)
• position_ids=None
• token_type_ids=tf.Tensor(shape=(1599, 73), dtype=int32)
• inputs_embeds=None
• past_key_values_length=0
• training=False
I googled for a while, and I can't find anything conclusive. I am pretty sure it has something to do with this part:
def call(self, inputs):
outputs = self.model(inputs)
return outputs
But again, I have tried a lot of different things, including limiting dataset size and installing different versions of transformers and tensorflow, but to no avail. Please let me know what I'm doing wrong. Thank you!
OP was using bert-base-cased for their model, and bert-base-uncased for their tokenizer, causing issues during training when the vocab size of the model and the tokenized data differed.
I am saving my model, optimizer, scheduler, and scaler in a general checkpoint.
Now when I load them, they load properly but after the first iteration the scaler.step(optimizer) throws this error:
Traceback (most recent call last):
File "HistNet/trainloop.py", line 92, in <module>
scaler.step(optimizer)
File "/opt/conda/lib/python3.8/site-packages/torch/cuda/amp/grad_scaler.py", line 333, in step
retval = optimizer.step(*args, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/optim/lr_scheduler.py", line 65, in wrapper
return wrapped(*args, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/optim/optimizer.py", line 89, in wrapper
return func(*args, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/torch/optim/adam.py", line 108, in step
F.adam(params_with_grad,
File "/opt/conda/lib/python3.8/site-packages/torch/optim/functional.py", line 86, in adam
exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
RuntimeError: The size of tensor a (32) must match the size of tensor b (64) at non-singleton dimension 0
Now I don't really understand why a shape mismatch of all things is there. I'm doing everything similarly to official docs, here is shortened version of my code:
dataloader = DataLoader(Dataset)
model1 = model1()
optimizer = optim.Adam(parameters, lr, betas)
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: decay_rate**epoch)
scaler = amp.GradScaler()
if resume: epoch_resume = load_checkpoint(path, model1, optimizer, scheduler, scaler)
for epoch in trange(epoch_resume, config['epochs']+1, desc='Epochs'):
for content_image, style_image in tqdm(dataloader, desc='Dataloader'):
content_image, style_image = content_image.to(device), style_image.to(device)
with amp.autocast():
content_image = TF.rgb_to_grayscale(content_image)
s = TF.rgb_to_grayscale(style_image)
deformation_field = model1(s, content_image)
output_image = F.grid_sample(content_image, deformation_field.float(), align_corners=False)
loss_after = cost_function(output_image, s, device=device)
loss_list += [loss_after]
scaler.scale(loss_after).backward()
scaler.step(optimizer)
scaler.update()
optimizer.zero_grad()
scheduler.step()
torch.save({
'epoch': epoch,
'model1_state_dict': model1.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scheduler_state_dict': scheduler.state_dict(),
'scaler_state_dict': scaler.state_dict(),
}, path)
def load_checkpoint(checkpoint_path, model1, optimizer, scheduler, scaler):
checkpoint = torch.load(checkpoint_path)
model1.load_state_dict(checkpoint['model1_state_dict'])
model1.train()
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
scaler.load_state_dict(checkpoint['scaler_state_dict'])
epoch = checkpoint['epoch']
return epoch+1
For anyone with similar issue:
It boiled down to my use of 2 models and 1 optimizer. I did:
parameters = set()
for net in nets:
parameters |= set(net.parameters())
which resulted in unordered list of parameters which was unsurprisingly different with each resume.
I currently changed it to:
parameters = []
for net in nets:
parameters += list(net.parameters())
which works but I haven't seen the use of list in any other code as of now and I have seen the use of a set. So be wary of some potential unwanted behavior. As of now I understand you lose only the fact that you can have multiple same tensors in a list. But with two different models I don't see how it could affect the optimizer. If you know more than me, please correct me.
I am trying to use Pytorch grouped Conv2d operator on very large images (10k x 10k pixels). I am getting an RuntimeError: offset is too big error when trying to apply a grouped convolution in the network. Anyone knows how to circumvent this?
Code for reproducibility:
import torch
import torch.nn as nn
import pdb
def create_img(size, batch_size=1, channels=3):
return torch.FloatTensor(batch_size, channels, size, size).uniform_(-1, 1)
class TestModel(nn.Module):
def __init__(self):
super(TestModel, self).__init__()
self.conv1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), stride=(1,1), groups=64, bias=False)
def forward(self, x):
out = self.conv1(x)
return out
if __name__ == '__main__':
model = TestModel()
data = create_img(5002, channels=64)
out = model(data)
pdb.set_trace()
and the error:
Traceback (most recent call last):
File "test.py", line 26, in <module>
out = model(data)
File ".../pipenv/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "test.py", line 17, in forward
out = self.conv1(x)
File ".../pipenv/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File ".../pipenv/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 320, in forward
self.padding, self.dilation, self.groups)
RuntimeError: offset is too big
I am using Python 3.6 and Pytorch 1.0.0. Strange thing is, this works with smaller images. Change the images size from 5002 to 502, for example.
Solved by updating Pytorch to 1.3.0
I have a code that mixes Tensorflow Probability (requires TF 2.00) with Keras Pruning, pruning weights of a first Dense Layer and providing inputs for TF probability, having both codes (Keras + TF) in the same model. The code:
from tensorflow_model_optimization.sparsity import keras as sparsity
from tensorflow.python import keras
import numpy as np
tf.disable_v2_behavior()
epochs = 50
num_train_samples = x1.shape[0]
end_step = 500
print('End step: ' + str(end_step))
tfd = tfp.distributions
input_shape=x1.shape
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
pruned_model = tf.keras.Sequential([
sparsity.prune_low_magnitude(
tf.keras.layers.Dense(1, activation='relu'),**pruning_params),
tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=1))
])
negloglik = lambda x, rv_x: -rv_x.log_prob(x)
pruned_model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.0001), loss=negloglik)
callbacks = [
pruning_callbacks.UpdatePruningStep(),
pruning_callbacks.PruningSummaries(log_dir="D:\Python\logs2", profile_batch=0)]
# ERROR HERE IN .fit()
pruned_model.fit(x1,y, epochs=50, verbose=True, batch_size=16,callbacks=callbacks)
yhat2 = pruned_model(np.array(dataframe.iloc[:,1]).T.astype(np.float32).reshape(-1,1)[650:800])
mean02 = tf.convert_to_tensor(yhat2)
mean2 = sess.run(mean02)
stddev2 = yhat2.stddev()
mean_plus_2_std2 = sess.run(mean2 - 3. * stddev2)
mean_minus_2_std2 = sess.run(mean2 + 3. * stddev2)
Details of the error:
File "<ipython-input-129-a0ad4118e99e>", line 1, in <module>
pruned_model.fit(x1,y, epochs=50, verbose=True, batch_size=16,callbacks=callbacks)
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 806, in fit
shuffle=shuffle)
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 2503, in _standardize_user_data
self._set_inputs(cast_inputs)
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow\python\training\tracking\base.py", line 456, in _method_wrapper
result = method(self, *args, **kwargs)
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py", line 2773, in _set_inputs
outputs = self.call(inputs, training=training)
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\sequential.py", line 256, in call
outputs = layer(inputs, **kwargs)
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 594, in __call__
self._maybe_build(inputs)
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1713, in _maybe_build
self.build(input_shapes)
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow_model_optimization\python\core\sparsity\keras\pruning_wrapper.py", line 175, in build
self.prunable_weights = self.layer.get_prunable_weights()
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow_model_optimization\python\core\sparsity\keras\prune_registry.py", line 169, in get_prunable_weights
return [getattr(layer, weight) for weight in cls._weight_names(layer)]
File "C:\Users\Rubens\Anaconda3\lib\site-packages\tensorflow_model_optimization\python\core\sparsity\keras\prune_registry.py", line 169, in <listcomp>
return [getattr(layer, weight) for weight in cls._weight_names(layer)]
AttributeError: 'Dense' object has no attribute 'kernel'
My question is: how to convert a Keras layer (prune_low_magnitude) to Tensorflow, or how to convert a Tensorflow Probability layer (tfp.layers.DistributionLambda) to Keras and train the model properly ?
The notebook uses Keras==2.2.4 and Tensorflow==2.0.0a0
I found the solution. I installed:
! pip install --upgrade tfp-nightly
! pip install tf_nightly
! pip install tf_estimator_nightly
I want to apply Spatial Pyramid Pooling before the Dense layer in a CNN.
I used Keras for implementation.
Tensorflow was used as a backend.
However, I got an error.
What's wrong with my code? Thank you.
Traceback (most recent call last):
File "<pyshell#25>", line 1, in <module>
model.add(SpatialPyramidPooling(pooling_regions, input_shape=Input(shape = (None,None,None,3))))
File "C:\Program Files\Python36\lib\site-packages\spp\SpatialPyramidPooling.py", line 33, in __init__
super(SpatialPyramidPooling, self).__init__(**kwargs)
File "C:\Program Files\Python36\lib\site-packages\keras\engine\topology.py", line 311, in __init__
batch_input_shape = (batch_size,) + tuple(kwargs['input_shape'])
File "C:\Program Files\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 439, in __iter__
"Tensor objects are not iterable when eager execution is not "
TypeError: Tensor objects are not iterable when eager execution is not enabled. To iterate over this tensor use tf.map_fn.
Here is the code:
from keras.engine.topology import Layer
from keras.models import Sequential
import keras.backend as K
import numpy as np
model = Sequential()
model.add(SpatialPyramidPooling((1,2,4), Input(shape=(None, None, None, 3))))
class SpatialPyramidPooling(Layer):
def __init__(self, pool_list, **kwargs):
self.dim_ordering = K.image_dim_ordering()
assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
self.pool_list = pool_list
self.num_outputs_per_channel = sum([i * i for i in pool_list])
super(SpatialPyramidPooling, self).__init__(**kwargs)
def call(self, x, mask=None):
input_shape = K.shape(x)
print(input_shape)
print(K.eval(input_shape))
outputs = K.variable(value=np.random.random((3,4)))
return outputs
I'm pretty sure you should be using input_shape=(None,None,None,3) instead of input_shape=Input(shape = (None,None,None,3))
Also, you cannot use any function that demands the presence of data in the call method. You're using K.shape and K.eval, both will bring you errors in compilation.
If you need information about the input shape, you have to do it in the def build(self, input_shape): method.