Related
Currently i am implementing Tensorflow model into Android Studio using Tensorflow Lite, i have already check the Tensorflow model using Tensorflow interperter and it gives correct result in Python. The problem is where when i input image from android studio it gives wrong classification. Here is the code if i want to predict in python.
image1 =cv2.imread("image")
image_fromarray = Image.fromarray(image1,'RGB')
resize_image = image_fromarray.resize((100, 100))
expand_input = np.expand_dims(resize_image,axis=0)
input_data = np.array(expand_input)
input_data = input_data/255
pred = loaded_model.predict(input_data)
result = pred.argmax()
result
And here is the code from Android Studio if want to get the image from imageView and predict.
public void onClick(View view) {
if (img == null) {
Toast.makeText(MainActivity.this, "No image selected", Toast.LENGTH_SHORT).show();
return;
}
try {
//resize image (100,100)
img = Bitmap.createScaledBitmap(img, imgsize, imgsize, false);
// Get pixels from the bitmap
int[] intValues = new int[imgsize *imgsize];
img.getPixels(intValues, 0, img.getWidth(), 0, 0, img.getWidth(), img.getHeight());
// Convert pixels to float values
float[] floatValues = new float[intValues.length * 3];
for (int i = 0; i < intValues.length; i++) {
final int val = intValues[i];
floatValues[i * 3] = ((val >> 16) & 0xFF) / 255.f;
floatValues[i * 3 + 1] = ((val >> 8) & 0xFF) / 255.f;
floatValues[i * 3 + 2] = (val & 0xFF) / 255.f;
}
TensorBuffer inputBuffer = TensorBuffer.createFixedSize(new int[]{1, imgsize, imgsize, 3}, DataType.FLOAT32);
inputBuffer.loadArray(floatValues);
Model2 model = Model2.newInstance(getApplicationContext());
Model2.Outputs outputs = model.process(inputBuffer);
TensorBuffer outputFeature0 = outputs.getOutputFeature0AsTensorBuffer();
// Releases model resources if no longer used.
model.close();
float[] confidence = outputFeature0.getFloatArray();
int maxPos=-1;
float maxConfidence = -1;
for (int i = 0;i<confidence.length;i++)
{
if(confidence[i]>maxConfidence){
maxConfidence = confidence[i];
maxPos=i;
}
}
String[] classes = {"Ripe Braeburn", "Ripe Red Apple", "Ripe Red Delicious", "Rotten"};
tv.setText(classes[maxPos]);
The problem is it only gives 1 result for any picture i choose from the test set. How can i modify the code in android studio so it gives the same result in python?
I'm trying to write an inference program with YOLO model in C++. I've searched for some info about darknet, but it has to use .cfg file to import the model structure(which is a bit too complicated for me...), thus I want to do the program with tensorflow.
(My model weight is converted from .hdf5(used in python) to .pb(used in C++))
I've found some examples written in python, it seems like they have done some work before the inference process... Source
def yolo_eval(yolo_outputs,
anchors,
num_classes,
image_shape,
max_boxes=50,
score_threshold=.6,
iou_threshold=.5):
"""Evaluate YOLO model on given input and return filtered boxes."""
num_layers = len(yolo_outputs)
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
input_shape = K.shape(yolo_outputs[0])[1:3] * 32
boxes = []
box_scores = []
for l in range(num_layers):
_boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
boxes.append(_boxes)
box_scores.append(_box_scores)
boxes = K.concatenate(boxes, axis=0)
box_scores = K.concatenate(box_scores, axis=0)
mask = box_scores >= score_threshold
max_boxes_tensor = K.constant(max_boxes, dtype='int32')
boxes_ = []
scores_ = []
classes_ = []
for c in range(num_classes):
# TODO: use keras backend instead of tf.
class_boxes = tf.boolean_mask(boxes, mask[:, c])
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
nms_index = tf.image.non_max_suppression(
class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
class_boxes = K.gather(class_boxes, nms_index)
class_box_scores = K.gather(class_box_scores, nms_index)
classes = K.ones_like(class_box_scores, 'int32') * c
boxes_.append(class_boxes)
scores_.append(class_box_scores)
classes_.append(classes)
boxes_ = K.concatenate(boxes_, axis=0)
scores_ = K.concatenate(scores_, axis=0)
classes_ = K.concatenate(classes_, axis=0)
return boxes_, scores_, classes_
I've printed out the return value
and it looks like this
boxes-> Tensor("concat_11:0", shape=(?, 4), dtype=float32)
scores-> Tensor("concat_12:0", shape=(?,), dtype=float32)
classes-> Tensor("concat_13:0", shape=(?,), dtype=int32)
the original output of my YOLO model(.hdf5) is (I got this by printed out model.output)
tf.Tensor 'conv2d_59_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
tf.Tensor 'conv2d_67_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
tf.Tensor 'conv2d_75_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
And the inference part of the python code is
out_boxes, out_scores, out_classes = sess.run(
[boxes, scores, classes],
feed_dict={
yolo_model.input: image_data,
input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
Compare to the python version of inference code,
C++ part is... (Reference)
int main()
{
string image = "test.jpg";
string graph = "yolo_weight.pb";
string labels = "coco.names";
int32 input_width = 416;
int32 input_height = 416;
float input_mean = 0;
float input_std = 255;
string input_layer = "input_1:0";
std::vector<std::string> output_layer = {"conv2d_59/BiasAdd:0", "conv2d_67/BiasAdd:0", "conv2d_75/BiasAdd:0" };
std::unique_ptr<tensorflow::Session> session;
string graph_path = tensorflow::io::JoinPath(root_dir, graph);
Status load_graph_status = LoadGraph(graph_path, &session);
std::vector<Tensor> resized_tensors;
string image_path = tensorflow::io::JoinPath(root_dir, image);
Status read_tensor_status = ReadTensorFromImageFile(image_path, input_height, input_width,
input_mean, input_std, &resized_tensors);
Tensor inpTensor = Tensor(DT_FLOAT, TensorShape({ 1, input_height, input_width, 3 }));
std::vector<Tensor> outputs;
cv::Mat srcImage = cv::imread(image);
cv::resize(srcImage, srcImage, cv::Size(input_width, input_height));
srcImage.convertTo(srcImage, CV_32FC3);
srcImage = srcImage / 255;
string ty = type2str(srcImage.type());
float *p = (&inpTensor)->flat<float>().data();
cv::Mat tensorMat(input_height, input_width, CV_32FC3, p);
srcImage.convertTo(tensorMat, CV_32FC3);
Status run_status = session->Run({{ input_layer, inpTensor }}, { output_layer }, {}, &outputs);
int cc = 1;
auto output_detection_class = outputs[0].tensor<float, 4>();
std::cout << "detection scores" << std::endl;
std::cout << "typeid(output_detection_scoreclass).name->" << typeid(output_detection_class).name() << std::endl;
for (int i = 0; i < 13; ++i)
{
for (int j = 0; j < 13; ++j)
{
for (int k = 0; k < 21; ++k)
{
// using (index_1, index_2, index_3) to access the element in a tensor
printf("i->%d, j->%d, k->%d\t", i, j, k);
std::cout << output_detection_class(1, i, j, k) << "\t";
cc += 1;
if (cc % 4 == 0)
{
std::cout << "\n";
}
}
}
std::cout << std::endl;
}
return 0;
}
The output of c++ version inference part is
outputs.size()-> 3
outputs[0].shape()-> [1,13,13,21]
outputs[1].shape()-> [1,26,26,21]
outputs[2].shape()-> [1,52,52,21]
But the output I get is pretty weird...
(The output value of outputs[0] doesn't seems like any one of score, class, or coordinates...)
So I'm wondering is it because I miss the part written in python before its inference? Or I use the wrong way to get my output data?
I've checked some related questions and answers...
1.Yolo v3 model output clarification with keras
2.Convert YoloV3 output to coordinates of bounding box, label and confidence
3.How to access tensorflow::Tensor C++
But I still can't figure out how to make it :(
I also found a repo which might be helpful,
I've taken a look at its yolo.cpp, but its model output tensor's shape is different from mine, I'm not sure if I can revise the code directly, its output tensor is
tf.Tensor 'import/output:0' shape=(?, 735) dtype = float32
Any help or advice is appreciated...
In case you're still struggling with this, I don't see where you are applying the Sigmoid and Exp to the output layer values.
You might look at this paper, which describes how to handle the output.
https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193
As Bryan said, there're still some actions need to be done with the output layer.
So in my case (according to this repo), I add this to the YOLO class (at file yolo.py) for adding those post-processing when saving model:
def output_pb(self, out_dir, out_pb):
out_bx = self.boxes.name.split(":")[0]
out_sc = self.scores.name.split(":")[0]
out_cs = self.classes.name.split(":")[0]
print(out_bx, out_sc, out_cs)
frozen_graph = tf.graph_util.remove_training_nodes(tf.graph_util.convert_variables_to_constants(self.sess, self.sess.graph.as_graph_def(), [out_bx, out_sc, out_cs]))
tf.io.write_graph(frozen_graph, out_dir, out_pb, as_text=False)
print("===== FINISH saving new pb file =====")
When saving model, I called the function like this:
yolo = YOLO(**config)
yolo.output_pb(output_dir, output_pb_name)
And when doing inference in C++,
the whole process goes like this:
// initialize model
YOLO* YOLO_data = (YOLO*)Init_DllODM_object(config);
// do some stuff to set data in YOLO_data
cv::Mat input_pic = "whatever_pic.png";
predict(YOLO_data, input_pic, YOLO_data ->bbox_res, YOLO_data ->score_res, YOLO_data ->class_res);
// draw result on pic
cv::Mat res = show_result(YOLO_data, input_pic);
Detailed code is here:
// yolo_cpp.h
struct YOLO
{
float score_thres;
std::vector<int> class_res;
std::vector<float> bbox_res, score_res;
std::string inp_tensor_name;
std::string placeholder_name;
std::vector<std::string> out_tensors;
Session* session;
Tensor t, inpTensor;
std::vector<tensorflow::Tensor> outTensor;
std::vector<int> MD_size;
std::vector<int> inp_pic_size;
std::vector<std::string> md_class_list;
std::vector<cv::Scalar> color_list;
int show_score;
int score_type;
int return_origin;
};
// yolo_cpp.cpp
void* Init_DllODM_object(json config)
{
std::string model_path = config["model"].get<std::string>();
YOLO* YOLO_data = new YOLO();
auto options = tensorflow::SessionOptions();
GraphDef graphdef;
// loading model to graph
Status status_load = ReadBinaryProto(Env::Default(), model_path, &graphdef);
options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(0.7);
options.config.mutable_gpu_options()->set_allow_growth(true);
int node_count = graphdef.node_size();
for (int i = 0; i < node_count; i++)
{
auto n = graphdef.node(i);
if (n.name().find("input_") != string::npos)
{
YOLO_data->inp_tensor_name = n.name();
}
else if (n.name().find("Placeholder_") != string::npos)
{
YOLO_data->placeholder_name = n.name();
}
else if (i == node_count - 5)
{
YOLO_data->out_tensors.push_back(n.name());
}
else if (i == node_count - 3)
{
YOLO_data->out_tensors.push_back(n.name());
}
else if (i == node_count - 1)
{
YOLO_data->out_tensors.push_back(n.name());
}
}
if (!status_load.ok()) {
std::cout << "ERROR: Loading model failed..." << std::endl;
std::cout << model_path << status_load.ToString() << "\n";
}
std::vector<int> MD_size_ = config["input_size"];
YOLO_data->MD_size = MD_size_;
std::vector<int> inp_pic_size_ = config["input_pic_size"];
YOLO_data->inp_pic_size = inp_pic_size_;
YOLO_data->inpTensor = Tensor(DT_FLOAT, TensorShape({ 1, YOLO_data->MD_size[0], YOLO_data->MD_size[1], 3 })); // input tensor
YOLO_data->t = Tensor(DT_FLOAT, TensorShape({ 2 }));
//ref: https://stackoverflow.com/questions/36804714/define-a-feed-dict-in-c-for-tensorflow-models
auto t_matrix = YOLO_data->t.tensor<float, 1>();
t_matrix(0) = YOLO_data->inp_pic_size[0];
t_matrix(1) = YOLO_data->inp_pic_size[1];
// create session
Status status_newsess = NewSession(options, &YOLO_data->session); //for the usage of gpu setting
Status status_create = YOLO_data->session->Create(graphdef);
if (!status_create.ok()) {
std::cout << "ERROR: Creating graph in session failed.." << status_create.ToString() << std::endl;
}
else {
std::cout << "----------- Successfully created session and load graph -------------" << std::endl;
}
return YOLO_data;
}
int predict(YOLO* YOLO_, cv::Mat srcImage, std::vector<float>& bbox_res, std::vector<float>& score_res, std::vector<int>& class_res)
{
// read image -> input image
if (srcImage.empty()) // check if image can open correctly
{
std::cout << "can't open the image!!!!!!!" << std::endl;
int res = -1;
return res;
}
// ref: https://ppt.cc/f7ERNx
std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
{ YOLO_->inp_tensor_name, YOLO_->inpTensor },
{ YOLO_->placeholder_name, YOLO_->t },
};
srcImage = letterbox_image(srcImage, YOLO_->MD_size[0], YOLO_->MD_size[1]);
convertCVMatToTensor(YOLO_, srcImage);
Status status_run = YOLO_->session->Run({ inputs }, { YOLO_->out_tensors }, {}, &YOLO_->outTensor);
if (!status_run.ok()) {
std::cout << "ERROR: RUN failed..." << std::endl;
std::cout << status_run.ToString() << "\n";
int res = -1;
return res;
}
TTypes<float>::Flat pp1 = YOLO_->outTensor[0].flat<float>();
TTypes<float>::Flat pp2 = YOLO_->outTensor[1].flat<float>();
TTypes<int>::Flat pp3 = YOLO_->outTensor[2].flat<int>();
int pp1_idx;
for (int i = 0; i < pp2.size(); i++)
{
pp1_idx = i * 4;
bbox_res.push_back(pp1(pp1_idx));
bbox_res.push_back(pp1(pp1_idx + 1));
bbox_res.push_back(pp1(pp1_idx + 2));
bbox_res.push_back(pp1(pp1_idx + 3));
score_res.push_back(pp2(i));
class_res.push_back(pp3(i));
}
return 0;
}
cv::Mat show_result(YOLO* inf_obj, cv::Mat inp_pic)
{
int bbox_idx;
std::string plot_str;
bool under_thresh = false;
std::vector<int> del_idx;
for (int i = 0; i < inf_obj->class_res.size(); i++)
{
int y_min, y_max, x_min, x_max;
bbox_idx = i * 4;
y_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx] + 0.5));
x_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 1] + 0.5));
y_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 2] + 0.5));
x_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 3] + 0.5));
//std::cout << md_class_list[class_res[i]] << ", ";
//std::cout << score_res[i] << ",";
//std::cout << "[" << x_min << ", " << y_min << ", " << x_max << ", " << y_max << "]\n";
if (inf_obj->show_score)
{
if (inf_obj->score_type)
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i] * 100, 2)).substr(0, 5) + "%";
else
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i], 2)).substr(0, 4);
}
else
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]];
if (inf_obj->score_res[i] >= inf_obj->score_thres)
{
inp_pic = plot_one_box(inp_pic, x_min, y_min, x_max, y_max, plot_str, inf_obj->color_list[inf_obj->class_res[i]]);
}
else
{
//std::cout << "score_res[i]->" << score_res[i] << "under thresh!!" << std::endl;
under_thresh = true;
del_idx.push_back(i);
}
}
if (under_thresh)
{
//std::cout << "*** deleting element" << std::endl;
for (int x = 0; x < del_idx.size(); x++)
{
bbox_idx = (del_idx[x] - x) * 4;
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 3);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 2);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 1);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx);
inf_obj->score_res.erase(inf_obj->score_res.begin() + del_idx[x] - x);
inf_obj->class_res.erase(inf_obj->class_res.begin() + del_idx[x] - x);
}
del_idx.clear();
}
return inp_pic;
}
Since my code is used for dll, I arranged in this way.
There are still some redundant code I didn't delete,
but I think the whole process can be done with these provided code so far.
Hope this help :D
I have trained model for semantic segmentation using this repo, got good results and tried to use this net in small library writen with tensorflow c API. I turned my keras model into protobuf file using this repo and run session using this code:
typedef struct model_t {
TF_Graph* graph;
TF_Session* session;
TF_Status* status;
TF_Output input, target, output;
TF_Operation *init_op, *train_op, *save_op, *restore_op;
TF_Output checkpoint_file;
} model_t;
typedef struct NetProperties {
int width;
int height;
int border;
int classes;
int inputSize;
} NetProperties;
static model_t * model;
static NetProperties * properties;
extern "C" EXPORT int ModelCreate(const char* nnFilename, const char* inputName, const char* outputName, int pictureWidth, int pictureHeight, int border, int classes) {
ModelDestroy();
model = (model_t*)malloc(sizeof(model_t));;
model->status = TF_NewStatus();
model->graph = TF_NewGraph();
properties = (NetProperties*)malloc(sizeof(NetProperties));
properties->width = pictureWidth;
properties->height = pictureHeight;
properties->border = border;
properties->classes = classes;
properties->inputSize = (pictureWidth + border * 2) * (pictureHeight + border * 2) * 3;
{
// Create the session.
TF_SessionOptions* opts = TF_NewSessionOptions();
model->session = TF_NewSession(model->graph, opts, model->status);
TF_DeleteSessionOptions(opts);
if (!Okay(model->status)) return 0;
}
TF_Graph* g = model->graph;
{
// Import the graph.
TF_Buffer* graph_def = read_file(nnFilename);
if (graph_def == NULL) return 0;
printf("Read GraphDef of %zu bytes\n", graph_def->length);
TF_ImportGraphDefOptions* opts = TF_NewImportGraphDefOptions();
TF_GraphImportGraphDef(g, graph_def, opts, model->status);
TF_DeleteImportGraphDefOptions(opts);
TF_DeleteBuffer(graph_def);
if (!Okay(model->status)) return 0;
}
// Handles to the interesting operations in the graph.
model->input.oper = TF_GraphOperationByName(g, inputName);
model->input.index = 0;
model->target.oper = TF_GraphOperationByName(g, "target");
model->target.index = 0;
model->output.oper = TF_GraphOperationByName(g, outputName);
model->output.index = 0;
model->init_op = TF_GraphOperationByName(g, "init");
model->train_op = TF_GraphOperationByName(g, "train");
model->save_op = TF_GraphOperationByName(g, "save/control_dependency");
model->restore_op = TF_GraphOperationByName(g, "save/restore_all");
model->checkpoint_file.oper = TF_GraphOperationByName(g, "save/Const");
model->checkpoint_file.index = 0;
// first prediction is slow
unsigned char * randomData = (unsigned char*)malloc(properties->inputSize * sizeof(unsigned char));
for (int i = 0; i < properties->inputSize; i++) {
randomData[i] = (unsigned char)100;
}
ModelPredict(randomData);
free(randomData);
return 1;
}
extern "C" EXPORT void ModelDestroy() {
if (model == nullptr) return;
TF_DeleteSession(model->session, model->status);
Okay(model->status);
TF_DeleteGraph(model->graph);
TF_DeleteStatus(model->status);
free(model);
}
extern "C" EXPORT unsigned char* ModelPredict(unsigned char * batch1) {
if (model == NULL) return NULL;
const int64_t dims[4] = { 1, properties->height + properties->border * 2, properties->width + properties->border * 2, 3 };
size_t nbytes = properties->inputSize;
// can be faster
float * arrayOfFloats = (float*)malloc(nbytes * sizeof(float));
//float sumUp = 0;
for (int i = 0; i < properties->inputSize; i++) {
arrayOfFloats[i] = batch1[i] * (1.f / 255.f);
//sumUp += arrayOfFloats[i];
}
//std::cout << sumUp << std::endl;
// removed due to jdehesa answer
//float ** inputFloats = (float**)malloc(nbytes * sizeof(float*));
//inputFloats[0] = arrayOfFloats;
// Optionally, you can check that your input_op and input tensors are correct
//// by using some of the functions provided by the C API.
//std::cout << "Input op info: " << TF_OperationNumOutputs(input_op) << "\n";
//std::cout << "Input data info: " << TF_Dim(input, 0) << "\n";
std::vector<TF_Output> inputs;
std::vector<TF_Tensor*> input_values;
TF_Operation* input_op = model->input.oper;
TF_Output input_opout = { input_op, 0 };
inputs.push_back(input_opout);
// reworked due to jdehesa answer
//TF_Tensor* input = TF_NewTensor(TF_FLOAT, dims, 4, (void*)inputFloats, //nbytes * sizeof(float), &Deallocator, NULL);
TF_Tensor* input = TF_NewTensor(TF_FLOAT, dims, 4, (void*)arrayOfFloats, nbytes * sizeof(float), &Deallocator, NULL);
input_values.push_back(input);
int outputSize = properties->width * properties->height * properties->classes;
int64_t out_dims[] = { 1, properties->height, properties->width, properties->classes };
// Create vector to store graph output operations
std::vector<TF_Output> outputs;
TF_Operation* output_op = model->output.oper;
TF_Output output_opout = { output_op, 0 };
outputs.push_back(output_opout);
// Create TF_Tensor* vector
//std::vector<TF_Tensor*> output_values(outputs.size(), nullptr);
// Similar to creating the input tensor, however here we don't yet have the
// output values, so we use TF_AllocateTensor()
TF_Tensor* output_value = TF_AllocateTensor(TF_FLOAT, out_dims, 4, outputSize * sizeof(float));
//output_values.push_back(output_value);
//// As with inputs, check the values for the output operation and output tensor
//std::cout << "Output: " << TF_OperationName(output_op) << "\n";
//std::cout << "Output info: " << TF_Dim(output_value, 0) << "\n";
TF_SessionRun(model->session, NULL,
&inputs[0], &input_values[0], inputs.size(),
&outputs[0], &output_value, outputs.size(),
/* No target operations to run */
NULL, 0, NULL, model->status);
if (!Okay(model->status)) return NULL;
TF_DeleteTensor(input_values[0]);
// memory allocations take place here
float* prediction = (float*)TF_TensorData(output_value);
//float* prediction = (float*)malloc(sizeof(float) * properties->inputSize / 3 * properties->classes);
//memcpy(prediction, TF_TensorData(output_value), sizeof(float) * properties->inputSize / 3 * properties->classes);
unsigned char * charPrediction = new unsigned char[outputSize * sizeof(unsigned char)];
sumUp = 0;
for (int i = 0; i < outputSize; i++) {
charPrediction[i] = (unsigned char)((prediction[i] * 255));
//sumUp += prediction[i];
}
//std::cout << sumUp << std::endl << std::endl;
//free(prediction);
TF_DeleteTensor(output_value);
return charPrediction;
}
The problem is that prediction result is always the same. I tried to pass random data and real images but the result was equal. However, defferent trained models give different prediction result, but for each model it's always same. As you can see in code snippet, I checked that pass different data and get same prediction every time
// first is float sum of passed picture, second is the float sum of answer
724306
22982.6
692004
22982.6
718490
22982.6
692004
22982.6
720861
22982.6
692004
22982.6
I tried to write my own keras to tensorflow .pb converter but result was the same.
import os, argparse
import tensorflow as tf
from tensorflow.keras.utils import get_custom_objects
from segmentation_models.losses import bce_dice_loss,dice_loss,cce_dice_loss
from segmentation_models.metrics import iou_score
# some custom functions from segmentation_models
get_custom_objects().update({
'dice_loss': dice_loss,
'bce_dice_loss': bce_dice_loss,
'cce_dice_loss': cce_dice_loss,
'iou_score': iou_score,
})
def freeze_keras(model_name):
tf.keras.backend.set_learning_phase(0)
model = tf.keras.models.load_model(model_name)
sess = tf.keras.backend.get_session()
constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), [out.op.name for out in model.outputs])
tf.train.write_graph(constant_graph, './', 'saved_model.pb', as_text=False)
freeze_keras('best-weights.hdf5')
Help me to find out how to fix prediction result in c api.
UPDATE 1: Reworked input array as jdehesa suggested
UPDATE 2: Added definition of model and NetProperties
I think you are not setting the input data correctly. Let's see.
float * arrayOfFloats1 = (float*)malloc(nbytes * sizeof(float));
float sumUp = 0;
Here you create arrayOfFloats1 to hold all the image data.
for (int i = 0; i < properties->inputSize; i++) {
arrayOfFloats1[i] = batch1[i] * (1.f / 255.f);
sumUp += arrayOfFloats1[i];
}
std::cout << sumUp << std::endl;
Here you set arrayOfFloats1 to the image data. This is all fine.
But then:
float ** inputFloats = (float**)malloc(nbytes * sizeof(float*));
Here you have inputFloats, which has space for nbytes float pointers. First, you probably would want to allocate space for float values, not float pointers (which probably do not have the same size). And then:
inputFloats[0] = arrayOfFloats1;
Here you are setting the first of those nbytes pointers to the pointer arrayOfFloats1. And then inputFloats is used as input to the model. But the remaining nbytes - 1 pointers have not been set to anything. Although not required, they are probably set all to zero.
If you just want to make an "array of arrays of floats" with arrayOfFloats1 you don't need to allocate any memory, you can simply do:
float ** inputFloats = &arrayOfFloats1;
But then you actually use inputFloats like this:
TF_Tensor* input = TF_NewTensor(
TF_FLOAT, dims, 4, (void*)inputFloats, nbytes * sizeof(float), &Deallocator, NULL);
So here you are saying that input is made up of the data in inputFloats, which will be a pointer to arrayOfFloats1 and then uninitialized memory. Probably you actually want something like:
TF_Tensor* input = TF_NewTensor(
TF_FLOAT, dims, 4, (void*)arrayOfFloats1, nbytes * sizeof(float), &Deallocator, NULL);
Which means input will be a tensor made up of the data in arrayOfFloats1 that you copied before. In fact, I don't think your code needs inputFloats at all.
Otherwise, from what I can tell the rest of the code seems correct. You should ensure that all allocated memory is properly freed in all cases (e.g. when you do if (!Okay(model->status)) return NULL; you should probably delete the input and output tensors before returning), but that is a different issue.
The issue was in the model. I have trained it using not normalized data from images (pixel values are between 0.0 and 255.0) and tried to interfere normilezed data (I devided each pixel value by 255 arrayOfFloats[i] = batch1[i] * (1.f / 255.f); and got values between 0.0 and 1.0) so my model thought that it gets black images every time and gave me similar answers. So I removed normalization and the model started to predict.
I am trying to rewrite the python code for mnist_client to c++. Since I am new to tensorflow and TF serving I am having some difficulties. I went through the tutorials and the c++ client example (inception_client).
Python mnist_client works without any problems, but when I run my c++ client it gives me the arg[0] is not a matrix
gRPC call return code: 3: In[0] is not a matrix
[[Node: MatMul = MatMul[T=DT_FLOAT, _output_shapes=[[?,10]], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_x_0_0, Variable/read)]]
I trained the model as in the tutorial and I've checked that the minst data I read is ok.
From this:
tensorflow Invalid argument: In[0] is not a matrix ,
I understand MatMul needs at least 2-dim data. But, I went through the c++ code for inception_client and python mnist_client and both read the image data into a 1-dim char array...
What am I missing here?
The code for inception_client: https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/inception_client.cc
Any help would be much appreciated. :)
class ServingClient{
public:
ServingClient(std::shared_ptr<Channel> channel) : stub_(PredictionService::NewStub(channel)){}
tensorflow::string callPredict( const tensorflow::string &model_name,
const tensorflow::string &model_signature,
const int num_tests){
PredictRequest request;
PredictResponse response;
ClientContext context;
int image_size;
int image_offset = 16;
int label_offset = 8;
request.mutable_model_spec()->set_name(model_name);
request.mutable_model_spec()->set_signature_name(model_signature);
google::protobuf::Map<tensorflow::string, tensorflow::TensorProto> &inputs = *request.mutable_inputs();
std::fstream imageFile("t10k-images-idx3-ubyte", std::ios::binary | std::ios::in);
std::fstream labelFile("t10k-labels-idx1-ubyte", std::ios::binary | std::ios::in);
labelFile.seekp(0);
imageFile.seekp(0);
uint32_t magic_number_images;
uint32_t nImages;
uint32_t magic_number_labels;
uint32_t rowsI =0;
uint32_t rowsL =0;
uint32_t colsI = 0;
uint32_t colsL = 0;
imageFile.read((char *)&magic_number_images, sizeof(magic_number_images));
imageFile.read((char *)&nImages, sizeof(nImages));
imageFile.read((char *)(&rowsI), sizeof(rowsI));
imageFile.read((char *)&colsI, sizeof(colsI));
image_size = ReverseInt(rowsI) * ReverseInt(colsI);
labelFile.read((char *)&magic_number_labels, sizeof(magic_number_labels));
labelFile.read((char *)&rowsL, sizeof(rowsL));
for(int i=0; i<num_tests; i++){
tensorflow::TensorProto proto;
labelFile.seekp(label_offset);
imageFile.seekp(image_offset);
//read mnist image
char *img = new char[image_size]();
char label = 0;
imageFile.read((char *)img, image_size);
image_offset += image_size;
//read label
labelFile.read(&label, 1);
label_offset++;
//predict
proto.set_dtype(tensorflow::DataType::DT_STRING);
proto.add_string_val(img, image_size);
proto.mutable_tensor_shape()->add_dim()->set_size(1);
inputs["images"] = proto;
Status status = stub_->Predict(&context, request, &response);
delete[] img;
if(status.ok()){
std::cout << "status OK." << std::endl;
OutMap &map_outputs = *response.mutable_outputs();
OutMap::iterator iter;
int output_index = 0;
for(iter = map_outputs.begin(); iter != map_outputs.end(); ++iter){
tensorflow::TensorProto &result_tensor_proto = iter->second;
tensorflow::Tensor tensor;
//check if response converted succesfully
bool converted = tensor.FromProto(result_tensor_proto);
if (converted) {
std::cout << "the result tensor[" << output_index << "] is:" << std::endl
<< tensor.SummarizeValue(10) << std::endl;
}
else {
std::cout << "the result tensor[" << output_index
<< "] convert failed." << std::endl;
}
++output_index;
}
}
else{
std::cout << "gRPC call return code: " << status.error_code() << ": "
<< status.error_message() << std::endl;
}
}
imageFile.close();
labelFile.close();
}
private:
std::unique_ptr<PredictionService::Stub> stub_;
};
EDIT 1: I assume the problem must be in how the model was created and what dimension is the data the client sends.
I used the provided python program that trains and exports the model which sets the dimensions:
feature_configs = {'x': tf.FixedLenFeature(shape=[784], dtype=tf.float32),}
tf_example = tf.parse_example(serialized_tf_example, feature_configs)
x = tf.identity(tf_example['x'], name='x') # use tf.identity() to assign name
y_ = tf.placeholder('float', shape=[None, 10])
w = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
As expected, the fix was obvious.
All that had to be done was to add another dimension:
proto.mutable_tensor_shape()->add_dim()->set_size(image_size);
to get [image_size,1] shape.
I need to send an array (representing an image) through a named FIFO pipe from a python process to a c++ process, and then back the other way (on a Linux system).
The below code works great when using named pipes between two Python processes. It uses numpy's tostring() and fromstring() functions:
Send frames over named pipe (Python)
import cv2
import numpy as np
from time import sleep
##########################################################
FIFO_Images = "./../pipes/images.fifo"
videoName = "./../../videos/videoName.avi"
delim = "break"
##########################################################
def sendImage(h, w, d, pixelarray):
imageString = pixelarray.tostring()
with open(FIFO_Images, "w") as f:
f.write(str(h)+ delim + str(w)+ delim + str(d) + delim + imageString)
sleep(.01)
return
##########################################################
cap = cv2.VideoCapture(videoName)
while(cap.isOpened()):
ret, frame_rgb = cap.read()
h, w, d = frame_rgb.shape
sendImage(h, w, d, frame_rgb)
cap.release()
cv2.destroyAllWindows()
Read frames over named pipe (Python)
import cv2
import numpy as np
##########################################################
FIFO_Images = "./../pipes/images.fifo"
delim = "break"
##########################################################
def getFrame():
with open(FIFO_Images, "r") as f:
data = f.read().split(delim)
#parse incoming string, which has format (height, width, depth, imageData)
h=int(data[0])
w=int(data[1])
d=int(data[2])
imageString = data[3]
#convert array string into numpy array
array = np.fromstring(imageString, dtype=np.uint8)
#reshape numpy array into the required dimensions
frame = array.reshape((h,w,d))
return frame
##########################################################
while(True):
frame = getFrame()
cv2.imshow('frame', frame)
cv2.waitKey(1) & 0xFF
However, I couldn't figure out how to read the entire image from the pipe on the cpp side, since it takes "\n" as a delimiter for the read automatically.
My workaround was to do a base64 encoding on the "tostring()" image, then send that over the pipe. This works, but the base64 decoding on the other slide is much too slow for real-time applications (~0.2 seconds per frame). Code:
Send base64-encoded images over named pipe (Python)
import cv2
import numpy as np
from time import time
from time import sleep
import base64
##########################################################
FIFO_Images = "./../pipes/images.fifo"
videoName = "./../../videos/videoName.avi"
delim = ";;"
##########################################################
def sendImage(h, w, d, pixelarray):
flat = pixelarray.flatten()
imageString = base64.b64encode(pixelarray.tostring())
fullString = str(h)+ delim + str(w)+ delim + str(d)+ delim + imageString + delim + "\n"
with open(FIFO_Images, "w") as f:
f.write(fullString)
return
##########################################################
cap = cv2.VideoCapture(videoName)
count = 0
while(cap.isOpened()):
ret, frame_rgb = cap.read()
h, w, d = frame_rgb.shape
frame_gbr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
sendImage(h, w, d, frame_rgb)
cap.release()
cv2.destroyAllWindows()
Read base64-encoded images over named pipe (C++)
#include "opencv2/opencv.hpp"
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <linux/stat.h>
#include <ctime>
using namespace std;
using namespace cv;
#define FIFO_FILE "./../../../pipes/images.fifo"
#define MAX_BUF 10000000
FILE *fp;
char readbuf[MAX_BUF + 1]; //add 1 to the expected size to accomodate the mysterious "extra byte", which I think signals the end of the line.
/************************BASE64 Decoding*********************************************/
std::string base64_encode(unsigned char const* , unsigned int len);
std::string base64_decode(std::string const& s);
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
static inline bool is_base64(unsigned char c) {
return (isalnum(c) || (c == '+') || (c == '/'));
}
std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
std::string ret;
int i = 0;
int j = 0;
unsigned char char_array_3[3];
unsigned char char_array_4[4];
while (in_len--) {
char_array_3[i++] = *(bytes_to_encode++);
if (i == 3) {
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for(i = 0; (i <4) ; i++)
ret += base64_chars[char_array_4[i]];
i = 0;
}
}
if (i)
{
for(j = i; j < 3; j++)
char_array_3[j] = '\0';
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (j = 0; (j < i + 1); j++)
ret += base64_chars[char_array_4[j]];
while((i++ < 3))
ret += '=';
}
return ret;
}
std::string base64_decode(std::string const& encoded_string) {
int in_len = encoded_string.size();
int i = 0;
int j = 0;
int in_ = 0;
unsigned char char_array_4[4], char_array_3[3];
std::string ret;
while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
char_array_4[i++] = encoded_string[in_]; in_++;
if (i ==4) {
for (i = 0; i <4; i++)
char_array_4[i] = base64_chars.find(char_array_4[i]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; (i < 3); i++)
ret += char_array_3[i];
i = 0;
}
}
if (i) {
for (j = i; j <4; j++)
char_array_4[j] = 0;
for (j = 0; j <4; j++)
char_array_4[j] = base64_chars.find(char_array_4[j]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
}
return ret;
}
/*********************************************************************/
int stringToInt(string str)
{
int num;
if (!(istringstream(str) >> num)) num = 0;
return num;
}
/*********************************************************************/
bool timerOn = 0;
clock_t timerStart;
void Timer(string process)
{
if (!timerOn)
{
timerStart = clock();
timerOn = true;
}
else if (timerOn)
{
double duration = (clock() - timerStart) / (double) CLOCKS_PER_SEC;
cout << "Time to complete: ";
printf("%.2f", duration);
cout << ": " << process << endl;
timerOn = false;
}
}
/*********************************************************************/
void getFrame()
{
string fullString;
string delimiter = ";;";
size_t pos = 0;
string token;
int h;
int w;
int d;
string imgString;
int fifo;
bool cont(true);
/***************************
Read from the pipe
www.tldp.org/LDP/lpg/node18.html
***************************/
Timer("Read from pipe");
fp = fopen(FIFO_FILE, "r");
fgets(readbuf, MAX_BUF + 1, fp); // Stops when MAX_BUF characters are read, the newline character ("\n") is read, or the EOF (end of file) is reached
string line(readbuf);
fclose(fp);
Timer("Read from pipe");
//////parse the string into components
Timer("Parse string");
int counter = 0;
while ((pos = line.find(delimiter)) != string::npos)
{
token = line.substr(0,pos);
if (counter == 0)
{
h = stringToInt(token);
}
else if (counter == 1)
{
w = stringToInt(token);
}
else if (counter == 2)
{
d = stringToInt(token);
}
else if (counter == 3)
{
imgString = token;
//cout << imgString[0] << endl;
}
else
{
cout << "ERROR: Too many paramaters passed" << endl;
return;
}
line.erase(0, pos + delimiter.length());
counter ++;
}
if (counter == 3)
{
imgString = token;
}
if (counter < 3)
{
cout << "ERROR: Not enough paramaters passed: " << counter << endl;
//return;
}
Timer("Parse string");
/***************************
Convert from Base64
***************************/
Timer("Decode Base64");
std::string decoded = base64_decode(imgString);
Timer("Decode Base64");
/***************************
Convert to vector of ints
***************************/
Timer("Convert to vector of ints");
std::vector<uchar> imgVector;
for (int i = 0; i < decoded.length(); i = i+1) // + 4)
{
int temp = (char(decoded[i]));
imgVector.push_back(temp);
}
Timer("Convert to vector of ints");
//////convert the vector into a matrix
Mat frame = Mat(imgVector).reshape(d, h);
namedWindow("Frame", WINDOW_AUTOSIZE);
imshow("Frame", frame);
waitKey(1);
}
int main()
{
/* Create the FIFO if it does not exist */
umask(0);
mknod(FIFO_FILE, S_IFIFO|0666, 0);
while(1)
{
getFrame();
}
return 0;
}
There must be a more efficient way to accomplish this. Can anyone make a recommendation? While I'm happy to hear suggestions for other methods to accomplish this, I am constrained to using named pipes for now.
This is overcomplicated. If you need to send binary data, send their length first, then newline (\n), and then the data (raw, no base64). Receive it on the other side by readling a line, parsing the number and then just reading a block of data of given length.
Example - writing binary data to a FIFO (or file) in Python:
#!/usr/bin/env python3
import os
fifo_name = 'fifo'
def main():
data = b'blob\n\x00 123'
try:
os.mkfifo(fifo_name)
except FileExistsError:
pass
with open(fifo_name, 'wb') as f:
# b for binary mode
f.write('{}\n'.format(len(data)).encode())
f.write(data)
if __name__ == '__main__':
main()
Reading binary data from FIFO in C++:
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sys/stat.h>
int main(int argc, char *argv[]) {
const char *fifo_name = "fifo";
mknod(fifo_name, S_IFIFO | 0666, 0);
std::ifstream f(fifo_name);
std::string line;
getline(f, line);
auto data_size = std::stoi(line);
std::cout << "Size: " << data_size << std::endl;
std::string data;
{
std::vector<char> buf(data_size);
f.read(buf.data(), data_size);
// write to vector data is valid since C++11
data.assign(buf.data(), buf.size());
}
if (!f.good()) {
std::cerr << "Read failed" << std::endl;
}
std::cout << "Data size: " << data.size() << " content: " << data << std::endl;
}