I want to reduce the object detection model size. For the same, I tried optimising Faster R-CNN model for object detection using pytorch-mobile optimiser, but the .pt zip file generated is of the same size as that of the original model size.
I used the code mention below
import torch
import torchvision
from torch.utils.mobile_optimizer import optimize_for_mobile
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()
script_model = torch.jit.script(model)
from torch.utils.mobile_optimizer import optimize_for_mobile
script_model_vulkan = optimize_for_mobile(script_model, backend='Vulkan')
torch.jit.save(script_model_vulkan, "frcnn.pth")
You have to quantize your model first
follow these steps here
& then use these methods
from torch.utils.mobile_optimizer import optimize_for_mobile
script_model_vulkan = optimize_for_mobile(script_model, backend='Vulkan')
torch.jit.save(script_model_vulkan, "frcnn.pth")
EDIT:
Quantization process for resnet50 model
import torchvision
model = torchvision.models.resnet50(pretrained=True)
import os
import torch
def print_model_size(mdl):
torch.save(mdl.state_dict(), "tmp.pt")
print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
os.remove('tmp.pt')
print_model_size(model) # will print original model size
backend = "qnnpack"
model.qconfig = torch.quantization.get_default_qconfig(backend)
torch.backends.quantized.engine = backend
model_static_quantized = torch.quantization.prepare(model, inplace=False)
model_static_quantized = torch.quantization.convert(model_static_quantized, inplace=False)
print_model_size(model_static_quantized) ## will print quantized model size
Related
I am facing a perplexing issue while attempting to convert a vanilla tensorflow/keras workflow into a tensorflow extended pipeline.
In short: the datasets generated using tfx’s ExampleGen component have different shapes from those created manually using tf.data.Dataset.from_tensor_slices() from the same data, and cannot be fed into a keras model.
Reproducible example
1. Data generation
Let’s assume we create a sample dataset using:
import pandas as pd
import random
df = pd.DataFrame({
'a': [float(x) for x in range(100)],
'b': [float(x + 1) for x in range(100)],
'c': [float(x**2) for x in range(100)],
'target': [random.randint(0, 2) for _ in range(100)],
})
df.to_parquet({my_path})
2. Model generation
Let's use a dummy dense model for simplicity's sake.
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
def build_model():
model = Sequential()
model.add(Dense(8, input_shape=(3,), activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(
optimizer=SGD(),
loss="sparse_categorical_crossentropy",
metrics=["sparse_categorical_accuracy"],
)
return model
3. What works: manual dataset creation
This parquet file can then be loaded back into a pandas df and converted into a tensorflow dataset using:
import tensorflow as tf
_BATCH_SIZE = 4
dataset = tf.data.Dataset.from_tensor_slices((
tf.cast(df[['a', 'b', 'c']].values, tf.float32),
tf.cast(df['target'].values, tf.int32),
)).batch(_BATCH_SIZE, drop_remainder=True)
This gives a dataset with cardinality() = <tf.Tensor: shape=(), dtype=int64, numpy=25>, which can be fed to the toy model above.
4. What doesn't work: making a tensorflow extended pipeline
I have tried to replicate those results by applying a slightly modified tfx starter pipeline:
from tfx_bsl.tfxio import dataset_options
from tfx.components import SchemaGen
from tfx.components import StatisticsGen
from tfx.components import Trainer
from tfx.dsl.components.base import executor_spec
from tfx.components.example_gen.component import FileBasedExampleGen
from tfx.components.example_gen.custom_executors import parquet_executor
from tfx.components.trainer.executor import GenericExecutor
from tfx.orchestration import metadata
from tfx.orchestration import pipeline
from tfx.proto import trainer_pb2
from tfx.proto import example_gen_pb2
from tfx.utils.io_utils import parse_pbtxt_file
_BATCH_SIZE = 4
_LABEL_KEY = 'target'
_EPOCHS = 10
def _input_fn(file_pattern, data_accessor, schema) -> Dataset:
dataset = data_accessor.tf_dataset_factory(
file_pattern,
dataset_options.TensorFlowDatasetOptions(
batch_size=_BATCH_SIZE,
label_key=_LABEL_KEY,
num_epochs=_EPOCHS,
),
schema,
)
return dataset
def build_model():
"""Same as above"""
...
return model
def run_fn(fn_args):
schema = parse_pbtxt_file(fn_args.schema_file, schema_pb2.Schema())
train_dataset = _input_fn(
fn_args.train_files,
fn_args.data_accessor,
schema,
)
eval_dataset = _input_fn(
fn_args.eval_files,
fn_args.data_accessor,
schema,
)
model = build_model()
model.fit(
train_dataset,
steps_per_epoch=fn_args.train_steps,
validation_data=eval_dataset,
validation_steps=fn_args.eval_steps,
epochs=_EPOCHS,
)
model.save(fn_args.serving_model_dir, save_format='tf')
def _create_pipeline(
pipeline_name: str,
pipeline_root: str,
data_root: str,
module_file: str,
metadata_path: str,
split: dict,
) -> pipeline.Pipeline:
split_config = example_gen_pb2.SplitConfig(
splits=[
example_gen_pb2.SplitConfig.Split(name=name, hash_buckets=buckets)
for name, buckets in split.items()
]
)
example_gen = FileBasedExampleGen(
input_base=data_root,
custom_executor_spec=executor_spec.ExecutorClassSpec(parquet_executor.Executor),
output_config=example_gen_pb2.Output(split_config=split_config),
)
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'])
trainer = Trainer(
module_file=module_file,
custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
examples=example_gen.outputs['examples'],
schema=infer_schema.outputs['schema'],
train_args=trainer_pb2.TrainArgs(),
eval_args=trainer_pb2.EvalArgs()
)
components = [example_gen, statistics_gen, infer_schema, trainer]
metadata_config = metadata.sqlite_metadata_connection_config(metadata_path)
_pipeline = pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=components,
metadata_connection_config=metadata_config,
)
return _pipeline
However, the dataset generated by ExampleGen has cardinality tf.Tensor(-2, shape=(), dtype=int64), and gives the following error message when fed to the same model:
ValueError: Layer sequential expects 1 inputs, but it received 3 input tensors. Inputs received: [<tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f40353373d0>, <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f4035337710>, <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f40352e3190>]
Importantly: the problem persists even when the data are stored as a csv file and read using CsvExampleGen, which makes the issue very unlikely to arise from the data themselves.
Also, batching the tfx output dataset has no effect on the results.
I’ve tried everything I could think of to no benefit. The relative obscurity of what's happening under tfx's hood doesn't help with the debugging either. Has anyone any idea of what the problem is?
Edit 1
Two points have come to my attention since writing this question:
data_accessor.tf_dataset_factory() doesn't actually output a tensorflow.python.data.ops.dataset_ops.TensorSliceDataset, but a tensorflow.python.data.ops.dataset_ops.PrefetchDataset instead.
There's actually a small bunch of as yet unanswered questions that look somewhat related to my problem discussing the pains of working with PrefetchDatasets:
TFDS Audio Preprocessing PrefetchDataset Problems
How to feed tf.prefetch dataset into LSTM?
Change PrefetchDataset shapes
Considering none of those questions have found an answer, and that the crux of the problem seems to be the lack of documentation regarding PrefetchDatasets and how to use them, I'll open an issue on tfx's board and see how it goes if this doesn't get answered here within a few days.
Edit 2: version and environment details
As requested by TensorFlow Support, here are the details regarding the versions of all my TensorFlow-related installs:
Core components:
tensorflow==2.3.0
tfx==0.25.0
tfx-bsl==0.25.0
TensorFlow-related stuff:
tensorflow-cloud==0.1.7
tensorflow-data-validation==0.25.0
tensorflow-datasets==3.0.0
tensorflow-estimator==2.3.0
tensorflow-hub==0.9.0
tensorflow-io==0.15.0
tensorflow-metadata==0.25.0
tensorflow-model-analysis==0.25.0
tensorflow-probability==0.11.0
tensorflow-serving-api==2.3.0
tensorflow-transform==0.25.0
Environment and other miscellaneous details:
Python version: 3.7.9
OS: Debian GNU/Linux 10 (buster)
Running from an N1 GCP instance
from keras_multi_head import MultiHeadAttention
import keras
from keras.layers import Dense,Input,Multiply
from keras import backend as K
from keras.layers.core import Dropout, Layer
from keras.models import Sequential,Model
import numpy as np
import tensorflow as tf
from self_attention_layer import Encoder
## multi source attention
class Multi_source_attention(keras.Model):
def __init__(self,read_n,embed_dim,num_heads,ff_dim,num_layers):
super().__init__()
self.read_n = read_n
self.embed_dim = embed_dim
self.num_heads = num_heads
self.ff_dim = ff_dim
self.num_layers = num_layers
self.get_weights = Dense(49, activation = 'relu',name = "get_weights")
def compute_output_shape(self,input_shape):
#([batch,7,7,256],[1,256])
return input_shape
def call(self,inputs):
## weights matrix
#(1,49)
weights_res = self.get_weights(inputs[1])
#(1,7,7)
weights = tf.reshape(weights_res,(1,7,7))
#(256,7,7)
weights = tf.tile(weights,[256,1,1])
## img from mobilenet
img=tf.reshape(inputs[0],[-1,7,7])
inter_res = tf.multiply(img,weights)
inter_res = tf.reshape(inter_res, (-1,256,49))
print(inter_res.shape)
att = Encoder(self.embed_dim,self.num_heads,self.ff_dim,self.num_layers)(inter_res)
return att
I try to construct a network to implement the part circled in the image. The output from LSTM **(1,256) and from the previous Mobilenet (batch,7,7,256). Then the output of LSTM is transformed to a weights matrix in form of (7,7).
But the problem is that the input shape of the output from mobilenet has a attribute batch. I have no idea how to deal with "batch" or how to set up a parameter to constraint the batch?
Could someone give me a tip?
And if I remove the function compute_output_shape(), one error unimplementerror occurs. the keras official doc tells me that I don't need to overwrite the function.
Could someone explain me about that?
Compute_output_shape is crucial to custom the layer. if the function summary() is called, the corresponding Graph is generated where the input and output shapes are showed in every layer. The compute_output_shape is responsible for the output shape.
I want to use trained model from https://www.kaggle.com/janeresh/imageprocess. I copied the notebook and tested it works perfectly. But if I download model.h5 and use it in Google Colab to predict masked or non masked it always return 0
I use this code to predict:
from tensorflow.keras.models import load_model
from keras.preprocessing import image
import numpy as np
import cv2
model = load_model('model.h5',compile=True)
model.summary()
img = cv2.imread('masked_image.png')
img = cv2.resize(img,(75,75))
img = np.reshape(img,[1,75,75,3])
prediction = model.predict_classes(img)
prediction
and the return is
array([[0]], dtype=int32)
Thanks
Target: I want to use the pretrained Faster-RCNN model to extract features from image.
What I have tried: I use below code to build the model:
import torchvision.models as models
from PIL import Image
import torchvision.transforms as T
import torch
# download the pretrained fasterrcnn model
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()
model.cuda()
# remove [2:] layers
modules = list(model.children())[:2]
model_t=torch.nn.Sequential(*modules)
# load image and extract features
img = Image.open('data/person.jpg')
transform = T.Compose([T.ToTensor()])
img_t = transform(img)
batch_t = torch.unsqueeze(img_t, 0).cuda()
ft = model_t(batch_t)
Error: But I got the following error:TypeError: conv2d(): argument 'input' (position 1) must be Tensor, not tuple
Please help! Thank you!
print(model.modules) to get the layer names. Then delete a layer with:
del model.my_layer_name
I have converted the .pb file to tflite file using the bazel. Now I want to load this tflite model in my python script just to test that weather this is giving me correct output or not ?
You can use TensorFlow Lite Python interpreter to load the tflite model in a python shell, and test it with your input data.
The code will be like this:
import numpy as np
import tensorflow as tf
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="converted_model.tflite")
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Test model on random input data.
input_shape = input_details[0]['shape']
input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)
The above code is from TensorFlow Lite official guide, for more detailed information, read this.
Using TensorFlow lite models in Python:
The verbosity of TensorFlow Lite is powerful because it allows you more control, but in many cases you just want to pass input and get an output, so I made a class that wraps this logic:
The following works with classification models from tfhub.dev, for example: https://tfhub.dev/tensorflow/lite-model/mobilenet_v2_1.0_224/1/metadata/1
# Usage
model = TensorflowLiteClassificationModel("path/to/model.tflite")
(label, probability) = model.run_from_filepath("path/to/image.jpeg")
import tensorflow as tf
import numpy as np
from PIL import Image
class TensorflowLiteClassificationModel:
def __init__(self, model_path, labels, image_size=224):
self.interpreter = tf.lite.Interpreter(model_path=model_path)
self.interpreter.allocate_tensors()
self._input_details = self.interpreter.get_input_details()
self._output_details = self.interpreter.get_output_details()
self.labels = labels
self.image_size=image_size
def run_from_filepath(self, image_path):
input_data_type = self._input_details[0]["dtype"]
image = np.array(Image.open(image_path).resize((self.image_size, self.image_size)), dtype=input_data_type)
if input_data_type == np.float32:
image = image / 255.
if image.shape == (1, 224, 224):
image = np.stack(image*3, axis=0)
return self.run(image)
def run(self, image):
"""
args:
image: a (1, image_size, image_size, 3) np.array
Returns list of [Label, Probability], of type List<str, float>
"""
self.interpreter.set_tensor(self._input_details[0]["index"], image)
self.interpreter.invoke()
tflite_interpreter_output = self.interpreter.get_tensor(self._output_details[0]["index"])
probabilities = np.array(tflite_interpreter_output[0])
# create list of ["label", probability], ordered descending probability
label_to_probabilities = []
for i, probability in enumerate(probabilities):
label_to_probabilities.append([self.labels[i], float(probability)])
return sorted(label_to_probabilities, key=lambda element: element[1])
Caution
However, you'll need to modify this to support different use cases, since I am passing images as input, and getting classification ([label, probability]) output. If you need text input (NLP), or other output (object detection outputs bounding boxes, labels and probabilities), classification (just labels), etc).
Also, if you are expecting different size image inputs, then you'd have to change the input size and reallocate the model (self.interpreter.allocate_tensors()). This is slow (inefficient). It's better to use the platform resizing functionality (e.g. Android graphics library) instead of using a TensorFlow lite model to do the resizing. Alternatively, you could resize the model with a separate model which would be much quicker to allocate_tensors() for.