Related
I have following code, where 'Snap.JPG' is a RGB format type.
import cv2
img = cv2.imread("./Snap.JPG")
img[:,:,:2] = 255
cv2.imshow("Img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
I want to convert this code into c++. What is the fastest way to implement img[:,:,:2] = 255 part of the code? Channel splitting and merging is one of the options i know, but is there any smarter way to do slicing in c++?
Edit:
Apologies, i should have mentioned what i want in the output. I need a fading effect, because i wanted to overlay a drawing on top of it.
This is an example of how to change pixels:
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
int main(int argc, char** argv) {
cv::Mat src_image = cv::imread("image.jpg", CV_LOAD_IMAGE_COLOR);
if(!src_image.data) {
std::cout << "Error: the image wasn't correctly loaded." << std::endl;
return -1;
}
cv::Mat image = src_image.clone();
// We iterate over all pixels of the image
for(int r = 0; r < image.rows; r++) {
// We obtain a pointer to the beginning of row r
cv::Vec3b* ptr = image.ptr<cv::Vec3b>(r);
for(int c = 0; c < image.cols; c++) {
ptr[c] = cv::Vec3b(255, 255, ptr[c][2]);
}
}
cv::imshow("Inverted Image", image);
cv::waitKey();
return 0;
}
Thanks for #Manuel's reply, it works quite well. but i could achieve the same result with faster speeds. I have added my code snippets inline your code.
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <iomanip>
int main(int argc, char** argv) {
clock_t start, end;
cv::Mat src_image = cv::imread("Snap.JPG", CV_LOAD_IMAGE_COLOR);
if(!src_image.data) {
std::cout << "Error: the image wasn't correctly loaded." << std::endl;
return -1;
}
/* 1st method */
cv::Mat image = src_image.clone();
start = clock();
// We iterate over all pixels of the image
for(int r = 0; r < image.rows; r++) {
// We obtain a pointer to the beginning of row r
cv::Vec3b* ptr = image.ptr<cv::Vec3b>(r);
for(int c = 0; c < image.cols; c++) {
ptr[c] = cv::Vec3b(255, 255, ptr[c][2]);
}
}
end = clock();
double time_taken = double(end - start) / double(CLOCKS_PER_SEC);
std::cout << "Time taken by 1st method : " << std::fixed << time_taken << std::setprecision(5);
std::cout << " sec " << std::endl;
/* 2nd Method */
start = clock();
src_image = src_image | cv::Scalar(255, 255, 0);
end = clock();
time_taken = double(end - start) / double(CLOCKS_PER_SEC);
std::cout << "Time taken by 2nd method : " << std::fixed << time_taken << std::setprecision(5);
std::cout << " sec " << std::endl;
bool isEqual = (sum(src_image != image) == cv::Scalar(0,0,0,0));
if (isEqual)
{
std::cout << "\nIdentical Mats !" << std::endl;
}
cv::imshow("Inverted Image", image);
cv::waitKey();
return 0;
}
output is following:
Time taken by 1st method : 0.001765 sec
Time taken by 2nd method : 0.00011 sec
Identical Mats !
I'm trying to write an inference program with YOLO model in C++. I've searched for some info about darknet, but it has to use .cfg file to import the model structure(which is a bit too complicated for me...), thus I want to do the program with tensorflow.
(My model weight is converted from .hdf5(used in python) to .pb(used in C++))
I've found some examples written in python, it seems like they have done some work before the inference process... Source
def yolo_eval(yolo_outputs,
anchors,
num_classes,
image_shape,
max_boxes=50,
score_threshold=.6,
iou_threshold=.5):
"""Evaluate YOLO model on given input and return filtered boxes."""
num_layers = len(yolo_outputs)
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
input_shape = K.shape(yolo_outputs[0])[1:3] * 32
boxes = []
box_scores = []
for l in range(num_layers):
_boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
boxes.append(_boxes)
box_scores.append(_box_scores)
boxes = K.concatenate(boxes, axis=0)
box_scores = K.concatenate(box_scores, axis=0)
mask = box_scores >= score_threshold
max_boxes_tensor = K.constant(max_boxes, dtype='int32')
boxes_ = []
scores_ = []
classes_ = []
for c in range(num_classes):
# TODO: use keras backend instead of tf.
class_boxes = tf.boolean_mask(boxes, mask[:, c])
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
nms_index = tf.image.non_max_suppression(
class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
class_boxes = K.gather(class_boxes, nms_index)
class_box_scores = K.gather(class_box_scores, nms_index)
classes = K.ones_like(class_box_scores, 'int32') * c
boxes_.append(class_boxes)
scores_.append(class_box_scores)
classes_.append(classes)
boxes_ = K.concatenate(boxes_, axis=0)
scores_ = K.concatenate(scores_, axis=0)
classes_ = K.concatenate(classes_, axis=0)
return boxes_, scores_, classes_
I've printed out the return value
and it looks like this
boxes-> Tensor("concat_11:0", shape=(?, 4), dtype=float32)
scores-> Tensor("concat_12:0", shape=(?,), dtype=float32)
classes-> Tensor("concat_13:0", shape=(?,), dtype=int32)
the original output of my YOLO model(.hdf5) is (I got this by printed out model.output)
tf.Tensor 'conv2d_59_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
tf.Tensor 'conv2d_67_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
tf.Tensor 'conv2d_75_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
And the inference part of the python code is
out_boxes, out_scores, out_classes = sess.run(
[boxes, scores, classes],
feed_dict={
yolo_model.input: image_data,
input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
Compare to the python version of inference code,
C++ part is... (Reference)
int main()
{
string image = "test.jpg";
string graph = "yolo_weight.pb";
string labels = "coco.names";
int32 input_width = 416;
int32 input_height = 416;
float input_mean = 0;
float input_std = 255;
string input_layer = "input_1:0";
std::vector<std::string> output_layer = {"conv2d_59/BiasAdd:0", "conv2d_67/BiasAdd:0", "conv2d_75/BiasAdd:0" };
std::unique_ptr<tensorflow::Session> session;
string graph_path = tensorflow::io::JoinPath(root_dir, graph);
Status load_graph_status = LoadGraph(graph_path, &session);
std::vector<Tensor> resized_tensors;
string image_path = tensorflow::io::JoinPath(root_dir, image);
Status read_tensor_status = ReadTensorFromImageFile(image_path, input_height, input_width,
input_mean, input_std, &resized_tensors);
Tensor inpTensor = Tensor(DT_FLOAT, TensorShape({ 1, input_height, input_width, 3 }));
std::vector<Tensor> outputs;
cv::Mat srcImage = cv::imread(image);
cv::resize(srcImage, srcImage, cv::Size(input_width, input_height));
srcImage.convertTo(srcImage, CV_32FC3);
srcImage = srcImage / 255;
string ty = type2str(srcImage.type());
float *p = (&inpTensor)->flat<float>().data();
cv::Mat tensorMat(input_height, input_width, CV_32FC3, p);
srcImage.convertTo(tensorMat, CV_32FC3);
Status run_status = session->Run({{ input_layer, inpTensor }}, { output_layer }, {}, &outputs);
int cc = 1;
auto output_detection_class = outputs[0].tensor<float, 4>();
std::cout << "detection scores" << std::endl;
std::cout << "typeid(output_detection_scoreclass).name->" << typeid(output_detection_class).name() << std::endl;
for (int i = 0; i < 13; ++i)
{
for (int j = 0; j < 13; ++j)
{
for (int k = 0; k < 21; ++k)
{
// using (index_1, index_2, index_3) to access the element in a tensor
printf("i->%d, j->%d, k->%d\t", i, j, k);
std::cout << output_detection_class(1, i, j, k) << "\t";
cc += 1;
if (cc % 4 == 0)
{
std::cout << "\n";
}
}
}
std::cout << std::endl;
}
return 0;
}
The output of c++ version inference part is
outputs.size()-> 3
outputs[0].shape()-> [1,13,13,21]
outputs[1].shape()-> [1,26,26,21]
outputs[2].shape()-> [1,52,52,21]
But the output I get is pretty weird...
(The output value of outputs[0] doesn't seems like any one of score, class, or coordinates...)
So I'm wondering is it because I miss the part written in python before its inference? Or I use the wrong way to get my output data?
I've checked some related questions and answers...
1.Yolo v3 model output clarification with keras
2.Convert YoloV3 output to coordinates of bounding box, label and confidence
3.How to access tensorflow::Tensor C++
But I still can't figure out how to make it :(
I also found a repo which might be helpful,
I've taken a look at its yolo.cpp, but its model output tensor's shape is different from mine, I'm not sure if I can revise the code directly, its output tensor is
tf.Tensor 'import/output:0' shape=(?, 735) dtype = float32
Any help or advice is appreciated...
In case you're still struggling with this, I don't see where you are applying the Sigmoid and Exp to the output layer values.
You might look at this paper, which describes how to handle the output.
https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193
As Bryan said, there're still some actions need to be done with the output layer.
So in my case (according to this repo), I add this to the YOLO class (at file yolo.py) for adding those post-processing when saving model:
def output_pb(self, out_dir, out_pb):
out_bx = self.boxes.name.split(":")[0]
out_sc = self.scores.name.split(":")[0]
out_cs = self.classes.name.split(":")[0]
print(out_bx, out_sc, out_cs)
frozen_graph = tf.graph_util.remove_training_nodes(tf.graph_util.convert_variables_to_constants(self.sess, self.sess.graph.as_graph_def(), [out_bx, out_sc, out_cs]))
tf.io.write_graph(frozen_graph, out_dir, out_pb, as_text=False)
print("===== FINISH saving new pb file =====")
When saving model, I called the function like this:
yolo = YOLO(**config)
yolo.output_pb(output_dir, output_pb_name)
And when doing inference in C++,
the whole process goes like this:
// initialize model
YOLO* YOLO_data = (YOLO*)Init_DllODM_object(config);
// do some stuff to set data in YOLO_data
cv::Mat input_pic = "whatever_pic.png";
predict(YOLO_data, input_pic, YOLO_data ->bbox_res, YOLO_data ->score_res, YOLO_data ->class_res);
// draw result on pic
cv::Mat res = show_result(YOLO_data, input_pic);
Detailed code is here:
// yolo_cpp.h
struct YOLO
{
float score_thres;
std::vector<int> class_res;
std::vector<float> bbox_res, score_res;
std::string inp_tensor_name;
std::string placeholder_name;
std::vector<std::string> out_tensors;
Session* session;
Tensor t, inpTensor;
std::vector<tensorflow::Tensor> outTensor;
std::vector<int> MD_size;
std::vector<int> inp_pic_size;
std::vector<std::string> md_class_list;
std::vector<cv::Scalar> color_list;
int show_score;
int score_type;
int return_origin;
};
// yolo_cpp.cpp
void* Init_DllODM_object(json config)
{
std::string model_path = config["model"].get<std::string>();
YOLO* YOLO_data = new YOLO();
auto options = tensorflow::SessionOptions();
GraphDef graphdef;
// loading model to graph
Status status_load = ReadBinaryProto(Env::Default(), model_path, &graphdef);
options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(0.7);
options.config.mutable_gpu_options()->set_allow_growth(true);
int node_count = graphdef.node_size();
for (int i = 0; i < node_count; i++)
{
auto n = graphdef.node(i);
if (n.name().find("input_") != string::npos)
{
YOLO_data->inp_tensor_name = n.name();
}
else if (n.name().find("Placeholder_") != string::npos)
{
YOLO_data->placeholder_name = n.name();
}
else if (i == node_count - 5)
{
YOLO_data->out_tensors.push_back(n.name());
}
else if (i == node_count - 3)
{
YOLO_data->out_tensors.push_back(n.name());
}
else if (i == node_count - 1)
{
YOLO_data->out_tensors.push_back(n.name());
}
}
if (!status_load.ok()) {
std::cout << "ERROR: Loading model failed..." << std::endl;
std::cout << model_path << status_load.ToString() << "\n";
}
std::vector<int> MD_size_ = config["input_size"];
YOLO_data->MD_size = MD_size_;
std::vector<int> inp_pic_size_ = config["input_pic_size"];
YOLO_data->inp_pic_size = inp_pic_size_;
YOLO_data->inpTensor = Tensor(DT_FLOAT, TensorShape({ 1, YOLO_data->MD_size[0], YOLO_data->MD_size[1], 3 })); // input tensor
YOLO_data->t = Tensor(DT_FLOAT, TensorShape({ 2 }));
//ref: https://stackoverflow.com/questions/36804714/define-a-feed-dict-in-c-for-tensorflow-models
auto t_matrix = YOLO_data->t.tensor<float, 1>();
t_matrix(0) = YOLO_data->inp_pic_size[0];
t_matrix(1) = YOLO_data->inp_pic_size[1];
// create session
Status status_newsess = NewSession(options, &YOLO_data->session); //for the usage of gpu setting
Status status_create = YOLO_data->session->Create(graphdef);
if (!status_create.ok()) {
std::cout << "ERROR: Creating graph in session failed.." << status_create.ToString() << std::endl;
}
else {
std::cout << "----------- Successfully created session and load graph -------------" << std::endl;
}
return YOLO_data;
}
int predict(YOLO* YOLO_, cv::Mat srcImage, std::vector<float>& bbox_res, std::vector<float>& score_res, std::vector<int>& class_res)
{
// read image -> input image
if (srcImage.empty()) // check if image can open correctly
{
std::cout << "can't open the image!!!!!!!" << std::endl;
int res = -1;
return res;
}
// ref: https://ppt.cc/f7ERNx
std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
{ YOLO_->inp_tensor_name, YOLO_->inpTensor },
{ YOLO_->placeholder_name, YOLO_->t },
};
srcImage = letterbox_image(srcImage, YOLO_->MD_size[0], YOLO_->MD_size[1]);
convertCVMatToTensor(YOLO_, srcImage);
Status status_run = YOLO_->session->Run({ inputs }, { YOLO_->out_tensors }, {}, &YOLO_->outTensor);
if (!status_run.ok()) {
std::cout << "ERROR: RUN failed..." << std::endl;
std::cout << status_run.ToString() << "\n";
int res = -1;
return res;
}
TTypes<float>::Flat pp1 = YOLO_->outTensor[0].flat<float>();
TTypes<float>::Flat pp2 = YOLO_->outTensor[1].flat<float>();
TTypes<int>::Flat pp3 = YOLO_->outTensor[2].flat<int>();
int pp1_idx;
for (int i = 0; i < pp2.size(); i++)
{
pp1_idx = i * 4;
bbox_res.push_back(pp1(pp1_idx));
bbox_res.push_back(pp1(pp1_idx + 1));
bbox_res.push_back(pp1(pp1_idx + 2));
bbox_res.push_back(pp1(pp1_idx + 3));
score_res.push_back(pp2(i));
class_res.push_back(pp3(i));
}
return 0;
}
cv::Mat show_result(YOLO* inf_obj, cv::Mat inp_pic)
{
int bbox_idx;
std::string plot_str;
bool under_thresh = false;
std::vector<int> del_idx;
for (int i = 0; i < inf_obj->class_res.size(); i++)
{
int y_min, y_max, x_min, x_max;
bbox_idx = i * 4;
y_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx] + 0.5));
x_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 1] + 0.5));
y_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 2] + 0.5));
x_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 3] + 0.5));
//std::cout << md_class_list[class_res[i]] << ", ";
//std::cout << score_res[i] << ",";
//std::cout << "[" << x_min << ", " << y_min << ", " << x_max << ", " << y_max << "]\n";
if (inf_obj->show_score)
{
if (inf_obj->score_type)
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i] * 100, 2)).substr(0, 5) + "%";
else
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i], 2)).substr(0, 4);
}
else
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]];
if (inf_obj->score_res[i] >= inf_obj->score_thres)
{
inp_pic = plot_one_box(inp_pic, x_min, y_min, x_max, y_max, plot_str, inf_obj->color_list[inf_obj->class_res[i]]);
}
else
{
//std::cout << "score_res[i]->" << score_res[i] << "under thresh!!" << std::endl;
under_thresh = true;
del_idx.push_back(i);
}
}
if (under_thresh)
{
//std::cout << "*** deleting element" << std::endl;
for (int x = 0; x < del_idx.size(); x++)
{
bbox_idx = (del_idx[x] - x) * 4;
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 3);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 2);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 1);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx);
inf_obj->score_res.erase(inf_obj->score_res.begin() + del_idx[x] - x);
inf_obj->class_res.erase(inf_obj->class_res.begin() + del_idx[x] - x);
}
del_idx.clear();
}
return inp_pic;
}
Since my code is used for dll, I arranged in this way.
There are still some redundant code I didn't delete,
but I think the whole process can be done with these provided code so far.
Hope this help :D
I have trained model for semantic segmentation using this repo, got good results and tried to use this net in small library writen with tensorflow c API. I turned my keras model into protobuf file using this repo and run session using this code:
typedef struct model_t {
TF_Graph* graph;
TF_Session* session;
TF_Status* status;
TF_Output input, target, output;
TF_Operation *init_op, *train_op, *save_op, *restore_op;
TF_Output checkpoint_file;
} model_t;
typedef struct NetProperties {
int width;
int height;
int border;
int classes;
int inputSize;
} NetProperties;
static model_t * model;
static NetProperties * properties;
extern "C" EXPORT int ModelCreate(const char* nnFilename, const char* inputName, const char* outputName, int pictureWidth, int pictureHeight, int border, int classes) {
ModelDestroy();
model = (model_t*)malloc(sizeof(model_t));;
model->status = TF_NewStatus();
model->graph = TF_NewGraph();
properties = (NetProperties*)malloc(sizeof(NetProperties));
properties->width = pictureWidth;
properties->height = pictureHeight;
properties->border = border;
properties->classes = classes;
properties->inputSize = (pictureWidth + border * 2) * (pictureHeight + border * 2) * 3;
{
// Create the session.
TF_SessionOptions* opts = TF_NewSessionOptions();
model->session = TF_NewSession(model->graph, opts, model->status);
TF_DeleteSessionOptions(opts);
if (!Okay(model->status)) return 0;
}
TF_Graph* g = model->graph;
{
// Import the graph.
TF_Buffer* graph_def = read_file(nnFilename);
if (graph_def == NULL) return 0;
printf("Read GraphDef of %zu bytes\n", graph_def->length);
TF_ImportGraphDefOptions* opts = TF_NewImportGraphDefOptions();
TF_GraphImportGraphDef(g, graph_def, opts, model->status);
TF_DeleteImportGraphDefOptions(opts);
TF_DeleteBuffer(graph_def);
if (!Okay(model->status)) return 0;
}
// Handles to the interesting operations in the graph.
model->input.oper = TF_GraphOperationByName(g, inputName);
model->input.index = 0;
model->target.oper = TF_GraphOperationByName(g, "target");
model->target.index = 0;
model->output.oper = TF_GraphOperationByName(g, outputName);
model->output.index = 0;
model->init_op = TF_GraphOperationByName(g, "init");
model->train_op = TF_GraphOperationByName(g, "train");
model->save_op = TF_GraphOperationByName(g, "save/control_dependency");
model->restore_op = TF_GraphOperationByName(g, "save/restore_all");
model->checkpoint_file.oper = TF_GraphOperationByName(g, "save/Const");
model->checkpoint_file.index = 0;
// first prediction is slow
unsigned char * randomData = (unsigned char*)malloc(properties->inputSize * sizeof(unsigned char));
for (int i = 0; i < properties->inputSize; i++) {
randomData[i] = (unsigned char)100;
}
ModelPredict(randomData);
free(randomData);
return 1;
}
extern "C" EXPORT void ModelDestroy() {
if (model == nullptr) return;
TF_DeleteSession(model->session, model->status);
Okay(model->status);
TF_DeleteGraph(model->graph);
TF_DeleteStatus(model->status);
free(model);
}
extern "C" EXPORT unsigned char* ModelPredict(unsigned char * batch1) {
if (model == NULL) return NULL;
const int64_t dims[4] = { 1, properties->height + properties->border * 2, properties->width + properties->border * 2, 3 };
size_t nbytes = properties->inputSize;
// can be faster
float * arrayOfFloats = (float*)malloc(nbytes * sizeof(float));
//float sumUp = 0;
for (int i = 0; i < properties->inputSize; i++) {
arrayOfFloats[i] = batch1[i] * (1.f / 255.f);
//sumUp += arrayOfFloats[i];
}
//std::cout << sumUp << std::endl;
// removed due to jdehesa answer
//float ** inputFloats = (float**)malloc(nbytes * sizeof(float*));
//inputFloats[0] = arrayOfFloats;
// Optionally, you can check that your input_op and input tensors are correct
//// by using some of the functions provided by the C API.
//std::cout << "Input op info: " << TF_OperationNumOutputs(input_op) << "\n";
//std::cout << "Input data info: " << TF_Dim(input, 0) << "\n";
std::vector<TF_Output> inputs;
std::vector<TF_Tensor*> input_values;
TF_Operation* input_op = model->input.oper;
TF_Output input_opout = { input_op, 0 };
inputs.push_back(input_opout);
// reworked due to jdehesa answer
//TF_Tensor* input = TF_NewTensor(TF_FLOAT, dims, 4, (void*)inputFloats, //nbytes * sizeof(float), &Deallocator, NULL);
TF_Tensor* input = TF_NewTensor(TF_FLOAT, dims, 4, (void*)arrayOfFloats, nbytes * sizeof(float), &Deallocator, NULL);
input_values.push_back(input);
int outputSize = properties->width * properties->height * properties->classes;
int64_t out_dims[] = { 1, properties->height, properties->width, properties->classes };
// Create vector to store graph output operations
std::vector<TF_Output> outputs;
TF_Operation* output_op = model->output.oper;
TF_Output output_opout = { output_op, 0 };
outputs.push_back(output_opout);
// Create TF_Tensor* vector
//std::vector<TF_Tensor*> output_values(outputs.size(), nullptr);
// Similar to creating the input tensor, however here we don't yet have the
// output values, so we use TF_AllocateTensor()
TF_Tensor* output_value = TF_AllocateTensor(TF_FLOAT, out_dims, 4, outputSize * sizeof(float));
//output_values.push_back(output_value);
//// As with inputs, check the values for the output operation and output tensor
//std::cout << "Output: " << TF_OperationName(output_op) << "\n";
//std::cout << "Output info: " << TF_Dim(output_value, 0) << "\n";
TF_SessionRun(model->session, NULL,
&inputs[0], &input_values[0], inputs.size(),
&outputs[0], &output_value, outputs.size(),
/* No target operations to run */
NULL, 0, NULL, model->status);
if (!Okay(model->status)) return NULL;
TF_DeleteTensor(input_values[0]);
// memory allocations take place here
float* prediction = (float*)TF_TensorData(output_value);
//float* prediction = (float*)malloc(sizeof(float) * properties->inputSize / 3 * properties->classes);
//memcpy(prediction, TF_TensorData(output_value), sizeof(float) * properties->inputSize / 3 * properties->classes);
unsigned char * charPrediction = new unsigned char[outputSize * sizeof(unsigned char)];
sumUp = 0;
for (int i = 0; i < outputSize; i++) {
charPrediction[i] = (unsigned char)((prediction[i] * 255));
//sumUp += prediction[i];
}
//std::cout << sumUp << std::endl << std::endl;
//free(prediction);
TF_DeleteTensor(output_value);
return charPrediction;
}
The problem is that prediction result is always the same. I tried to pass random data and real images but the result was equal. However, defferent trained models give different prediction result, but for each model it's always same. As you can see in code snippet, I checked that pass different data and get same prediction every time
// first is float sum of passed picture, second is the float sum of answer
724306
22982.6
692004
22982.6
718490
22982.6
692004
22982.6
720861
22982.6
692004
22982.6
I tried to write my own keras to tensorflow .pb converter but result was the same.
import os, argparse
import tensorflow as tf
from tensorflow.keras.utils import get_custom_objects
from segmentation_models.losses import bce_dice_loss,dice_loss,cce_dice_loss
from segmentation_models.metrics import iou_score
# some custom functions from segmentation_models
get_custom_objects().update({
'dice_loss': dice_loss,
'bce_dice_loss': bce_dice_loss,
'cce_dice_loss': cce_dice_loss,
'iou_score': iou_score,
})
def freeze_keras(model_name):
tf.keras.backend.set_learning_phase(0)
model = tf.keras.models.load_model(model_name)
sess = tf.keras.backend.get_session()
constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), [out.op.name for out in model.outputs])
tf.train.write_graph(constant_graph, './', 'saved_model.pb', as_text=False)
freeze_keras('best-weights.hdf5')
Help me to find out how to fix prediction result in c api.
UPDATE 1: Reworked input array as jdehesa suggested
UPDATE 2: Added definition of model and NetProperties
I think you are not setting the input data correctly. Let's see.
float * arrayOfFloats1 = (float*)malloc(nbytes * sizeof(float));
float sumUp = 0;
Here you create arrayOfFloats1 to hold all the image data.
for (int i = 0; i < properties->inputSize; i++) {
arrayOfFloats1[i] = batch1[i] * (1.f / 255.f);
sumUp += arrayOfFloats1[i];
}
std::cout << sumUp << std::endl;
Here you set arrayOfFloats1 to the image data. This is all fine.
But then:
float ** inputFloats = (float**)malloc(nbytes * sizeof(float*));
Here you have inputFloats, which has space for nbytes float pointers. First, you probably would want to allocate space for float values, not float pointers (which probably do not have the same size). And then:
inputFloats[0] = arrayOfFloats1;
Here you are setting the first of those nbytes pointers to the pointer arrayOfFloats1. And then inputFloats is used as input to the model. But the remaining nbytes - 1 pointers have not been set to anything. Although not required, they are probably set all to zero.
If you just want to make an "array of arrays of floats" with arrayOfFloats1 you don't need to allocate any memory, you can simply do:
float ** inputFloats = &arrayOfFloats1;
But then you actually use inputFloats like this:
TF_Tensor* input = TF_NewTensor(
TF_FLOAT, dims, 4, (void*)inputFloats, nbytes * sizeof(float), &Deallocator, NULL);
So here you are saying that input is made up of the data in inputFloats, which will be a pointer to arrayOfFloats1 and then uninitialized memory. Probably you actually want something like:
TF_Tensor* input = TF_NewTensor(
TF_FLOAT, dims, 4, (void*)arrayOfFloats1, nbytes * sizeof(float), &Deallocator, NULL);
Which means input will be a tensor made up of the data in arrayOfFloats1 that you copied before. In fact, I don't think your code needs inputFloats at all.
Otherwise, from what I can tell the rest of the code seems correct. You should ensure that all allocated memory is properly freed in all cases (e.g. when you do if (!Okay(model->status)) return NULL; you should probably delete the input and output tensors before returning), but that is a different issue.
The issue was in the model. I have trained it using not normalized data from images (pixel values are between 0.0 and 255.0) and tried to interfere normilezed data (I devided each pixel value by 255 arrayOfFloats[i] = batch1[i] * (1.f / 255.f); and got values between 0.0 and 1.0) so my model thought that it gets black images every time and gave me similar answers. So I removed normalization and the model started to predict.
I need to send an array (representing an image) through a named FIFO pipe from a python process to a c++ process, and then back the other way (on a Linux system).
The below code works great when using named pipes between two Python processes. It uses numpy's tostring() and fromstring() functions:
Send frames over named pipe (Python)
import cv2
import numpy as np
from time import sleep
##########################################################
FIFO_Images = "./../pipes/images.fifo"
videoName = "./../../videos/videoName.avi"
delim = "break"
##########################################################
def sendImage(h, w, d, pixelarray):
imageString = pixelarray.tostring()
with open(FIFO_Images, "w") as f:
f.write(str(h)+ delim + str(w)+ delim + str(d) + delim + imageString)
sleep(.01)
return
##########################################################
cap = cv2.VideoCapture(videoName)
while(cap.isOpened()):
ret, frame_rgb = cap.read()
h, w, d = frame_rgb.shape
sendImage(h, w, d, frame_rgb)
cap.release()
cv2.destroyAllWindows()
Read frames over named pipe (Python)
import cv2
import numpy as np
##########################################################
FIFO_Images = "./../pipes/images.fifo"
delim = "break"
##########################################################
def getFrame():
with open(FIFO_Images, "r") as f:
data = f.read().split(delim)
#parse incoming string, which has format (height, width, depth, imageData)
h=int(data[0])
w=int(data[1])
d=int(data[2])
imageString = data[3]
#convert array string into numpy array
array = np.fromstring(imageString, dtype=np.uint8)
#reshape numpy array into the required dimensions
frame = array.reshape((h,w,d))
return frame
##########################################################
while(True):
frame = getFrame()
cv2.imshow('frame', frame)
cv2.waitKey(1) & 0xFF
However, I couldn't figure out how to read the entire image from the pipe on the cpp side, since it takes "\n" as a delimiter for the read automatically.
My workaround was to do a base64 encoding on the "tostring()" image, then send that over the pipe. This works, but the base64 decoding on the other slide is much too slow for real-time applications (~0.2 seconds per frame). Code:
Send base64-encoded images over named pipe (Python)
import cv2
import numpy as np
from time import time
from time import sleep
import base64
##########################################################
FIFO_Images = "./../pipes/images.fifo"
videoName = "./../../videos/videoName.avi"
delim = ";;"
##########################################################
def sendImage(h, w, d, pixelarray):
flat = pixelarray.flatten()
imageString = base64.b64encode(pixelarray.tostring())
fullString = str(h)+ delim + str(w)+ delim + str(d)+ delim + imageString + delim + "\n"
with open(FIFO_Images, "w") as f:
f.write(fullString)
return
##########################################################
cap = cv2.VideoCapture(videoName)
count = 0
while(cap.isOpened()):
ret, frame_rgb = cap.read()
h, w, d = frame_rgb.shape
frame_gbr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
sendImage(h, w, d, frame_rgb)
cap.release()
cv2.destroyAllWindows()
Read base64-encoded images over named pipe (C++)
#include "opencv2/opencv.hpp"
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <linux/stat.h>
#include <ctime>
using namespace std;
using namespace cv;
#define FIFO_FILE "./../../../pipes/images.fifo"
#define MAX_BUF 10000000
FILE *fp;
char readbuf[MAX_BUF + 1]; //add 1 to the expected size to accomodate the mysterious "extra byte", which I think signals the end of the line.
/************************BASE64 Decoding*********************************************/
std::string base64_encode(unsigned char const* , unsigned int len);
std::string base64_decode(std::string const& s);
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
static inline bool is_base64(unsigned char c) {
return (isalnum(c) || (c == '+') || (c == '/'));
}
std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
std::string ret;
int i = 0;
int j = 0;
unsigned char char_array_3[3];
unsigned char char_array_4[4];
while (in_len--) {
char_array_3[i++] = *(bytes_to_encode++);
if (i == 3) {
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for(i = 0; (i <4) ; i++)
ret += base64_chars[char_array_4[i]];
i = 0;
}
}
if (i)
{
for(j = i; j < 3; j++)
char_array_3[j] = '\0';
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (j = 0; (j < i + 1); j++)
ret += base64_chars[char_array_4[j]];
while((i++ < 3))
ret += '=';
}
return ret;
}
std::string base64_decode(std::string const& encoded_string) {
int in_len = encoded_string.size();
int i = 0;
int j = 0;
int in_ = 0;
unsigned char char_array_4[4], char_array_3[3];
std::string ret;
while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
char_array_4[i++] = encoded_string[in_]; in_++;
if (i ==4) {
for (i = 0; i <4; i++)
char_array_4[i] = base64_chars.find(char_array_4[i]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; (i < 3); i++)
ret += char_array_3[i];
i = 0;
}
}
if (i) {
for (j = i; j <4; j++)
char_array_4[j] = 0;
for (j = 0; j <4; j++)
char_array_4[j] = base64_chars.find(char_array_4[j]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
}
return ret;
}
/*********************************************************************/
int stringToInt(string str)
{
int num;
if (!(istringstream(str) >> num)) num = 0;
return num;
}
/*********************************************************************/
bool timerOn = 0;
clock_t timerStart;
void Timer(string process)
{
if (!timerOn)
{
timerStart = clock();
timerOn = true;
}
else if (timerOn)
{
double duration = (clock() - timerStart) / (double) CLOCKS_PER_SEC;
cout << "Time to complete: ";
printf("%.2f", duration);
cout << ": " << process << endl;
timerOn = false;
}
}
/*********************************************************************/
void getFrame()
{
string fullString;
string delimiter = ";;";
size_t pos = 0;
string token;
int h;
int w;
int d;
string imgString;
int fifo;
bool cont(true);
/***************************
Read from the pipe
www.tldp.org/LDP/lpg/node18.html
***************************/
Timer("Read from pipe");
fp = fopen(FIFO_FILE, "r");
fgets(readbuf, MAX_BUF + 1, fp); // Stops when MAX_BUF characters are read, the newline character ("\n") is read, or the EOF (end of file) is reached
string line(readbuf);
fclose(fp);
Timer("Read from pipe");
//////parse the string into components
Timer("Parse string");
int counter = 0;
while ((pos = line.find(delimiter)) != string::npos)
{
token = line.substr(0,pos);
if (counter == 0)
{
h = stringToInt(token);
}
else if (counter == 1)
{
w = stringToInt(token);
}
else if (counter == 2)
{
d = stringToInt(token);
}
else if (counter == 3)
{
imgString = token;
//cout << imgString[0] << endl;
}
else
{
cout << "ERROR: Too many paramaters passed" << endl;
return;
}
line.erase(0, pos + delimiter.length());
counter ++;
}
if (counter == 3)
{
imgString = token;
}
if (counter < 3)
{
cout << "ERROR: Not enough paramaters passed: " << counter << endl;
//return;
}
Timer("Parse string");
/***************************
Convert from Base64
***************************/
Timer("Decode Base64");
std::string decoded = base64_decode(imgString);
Timer("Decode Base64");
/***************************
Convert to vector of ints
***************************/
Timer("Convert to vector of ints");
std::vector<uchar> imgVector;
for (int i = 0; i < decoded.length(); i = i+1) // + 4)
{
int temp = (char(decoded[i]));
imgVector.push_back(temp);
}
Timer("Convert to vector of ints");
//////convert the vector into a matrix
Mat frame = Mat(imgVector).reshape(d, h);
namedWindow("Frame", WINDOW_AUTOSIZE);
imshow("Frame", frame);
waitKey(1);
}
int main()
{
/* Create the FIFO if it does not exist */
umask(0);
mknod(FIFO_FILE, S_IFIFO|0666, 0);
while(1)
{
getFrame();
}
return 0;
}
There must be a more efficient way to accomplish this. Can anyone make a recommendation? While I'm happy to hear suggestions for other methods to accomplish this, I am constrained to using named pipes for now.
This is overcomplicated. If you need to send binary data, send their length first, then newline (\n), and then the data (raw, no base64). Receive it on the other side by readling a line, parsing the number and then just reading a block of data of given length.
Example - writing binary data to a FIFO (or file) in Python:
#!/usr/bin/env python3
import os
fifo_name = 'fifo'
def main():
data = b'blob\n\x00 123'
try:
os.mkfifo(fifo_name)
except FileExistsError:
pass
with open(fifo_name, 'wb') as f:
# b for binary mode
f.write('{}\n'.format(len(data)).encode())
f.write(data)
if __name__ == '__main__':
main()
Reading binary data from FIFO in C++:
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sys/stat.h>
int main(int argc, char *argv[]) {
const char *fifo_name = "fifo";
mknod(fifo_name, S_IFIFO | 0666, 0);
std::ifstream f(fifo_name);
std::string line;
getline(f, line);
auto data_size = std::stoi(line);
std::cout << "Size: " << data_size << std::endl;
std::string data;
{
std::vector<char> buf(data_size);
f.read(buf.data(), data_size);
// write to vector data is valid since C++11
data.assign(buf.data(), buf.size());
}
if (!f.good()) {
std::cerr << "Read failed" << std::endl;
}
std::cout << "Data size: " << data.size() << " content: " << data << std::endl;
}
I'm trying to translate the following Python code into C++:
import struct
import binascii
inputstring = ("0000003F" "0000803F" "AD10753F" "00000080")
num_vals = 4
for i in range(num_vals):
rawhex = inputstring[i*8:(i*8)+8]
# <f for little endian float
val = struct.unpack("<f", binascii.unhexlify(rawhex))[0]
print val
# Output:
# 0.5
# 1.0
# 0.957285702229
# -0.0
So it reads 32-bit worth of the hex-encoded string, turns it into a byte-array with the unhexlify method, and interprets it as a little-endian float value.
The following almost works, but the code is kind of crappy (and the last 00000080 parses incorrectly):
#include <sstream>
#include <iostream>
int main()
{
// The hex-encoded string, and number of values are loaded from a file.
// The num_vals might be wrong, so some basic error checking is needed.
std::string inputstring = "0000003F" "0000803F" "AD10753F" "00000080";
int num_vals = 4;
std::istringstream ss(inputstring);
for(unsigned int i = 0; i < num_vals; ++i)
{
char rawhex[8];
// The ifdef is wrong. It is not the way to detect endianness (it's
// always defined)
#ifdef BIG_ENDIAN
rawhex[6] = ss.get();
rawhex[7] = ss.get();
rawhex[4] = ss.get();
rawhex[5] = ss.get();
rawhex[2] = ss.get();
rawhex[3] = ss.get();
rawhex[0] = ss.get();
rawhex[1] = ss.get();
#else
rawhex[0] = ss.get();
rawhex[1] = ss.get();
rawhex[2] = ss.get();
rawhex[3] = ss.get();
rawhex[4] = ss.get();
rawhex[5] = ss.get();
rawhex[6] = ss.get();
rawhex[7] = ss.get();
#endif
if(ss.good())
{
std::stringstream convert;
convert << std::hex << rawhex;
int32_t val;
convert >> val;
std::cerr << (*(float*)(&val)) << "\n";
}
else
{
std::ostringstream os;
os << "Not enough values in LUT data. Found " << i;
os << ". Expected " << num_vals;
std::cerr << os.str() << std::endl;
throw std::exception();
}
}
}
(compiles on OS X 10.7/gcc-4.2.1, with a simple g++ blah.cpp)
Particularly, I'd like to get rid of the BIG_ENDIAN macro stuff, as I'm sure there is a nicer way to do this, as this post discusses.
Few other random details - I can't use Boost (too large a dependency for the project). The string will usually contain between 1536 (83*3) and 98304 float values (323*3), at most 786432 (643*3)
(edit2: added another value, 00000080 == -0.0)
The following is your updated code modified to remove the #ifdef BIG_ENDIAN block. It uses a read technique that should be host byte order independent. It does this by reading the hex bytes (which are little endian in your source string) into a big endian string format compatible with the iostream std::hex operator. Once in this format it should not matter what the host byte order is.
Additionally, it fixes a bug in that rawhex needs to be zero terminated to be inserted into convert without trailing garbage in some cases.
I do not have a big endian system to test on, so please verify on your platform. This was compiled and tested under Cygwin.
#include <sstream>
#include <iostream>
int main()
{
// The hex-encoded string, and number of values are loaded from a file.
// The num_vals might be wrong, so some basic error checking is needed.
std::string inputstring = "0000003F0000803FAD10753F00000080";
int num_vals = 4;
std::istringstream ss(inputstring);
size_t const k_DataSize = sizeof(float);
size_t const k_HexOctetLen = 2;
for (uint32_t i = 0; i < num_vals; ++i)
{
char rawhex[k_DataSize * k_HexOctetLen + 1];
// read little endian string into memory array
for (uint32_t j=k_DataSize; (j > 0) && ss.good(); --j)
{
ss.read(rawhex + ((j-1) * k_HexOctetLen), k_HexOctetLen);
}
// terminate the string (needed for safe conversion)
rawhex[k_DataSize * k_HexOctetLen] = 0;
if (ss.good())
{
std::stringstream convert;
convert << std::hex << rawhex;
uint32_t val;
convert >> val;
std::cerr << (*(float*)(&val)) << "\n";
}
else
{
std::ostringstream os;
os << "Not enough values in LUT data. Found " << i;
os << ". Expected " << num_vals;
std::cerr << os.str() << std::endl;
throw std::exception();
}
}
}
I think the whole istringstring business is an overkill. It's much easier to parse this yourself one digit at a time.
First, create a function to convert a hex digit into an integer:
signed char htod(char c)
{
c = tolower(c);
if(isdigit(c))
return c - '0';
if(c >= 'a' && c <= 'f')
return c - 'a' + 10;
return -1;
}
Then simply convert the string into an integer. The code below doesn't check for errors and assumes big endianness -- but you should be able to fill in the details.
unsigned long t = 0;
for(int i = 0; i < s.length(); ++i)
t |= (t << 4) & htod(s[i]);
Then your float is
float f = * (float *) &t;
This is what we ended up with, OpenColorIO/src/core/FileFormatIridasLook.cpp
(Amardeep's answer with the unsigned uint32_t fix would likely work also)
// convert hex ascii to int
// return true on success, false on failure
bool hexasciitoint(char& ival, char character)
{
if(character>=48 && character<=57) // [0-9]
{
ival = static_cast<char>(character-48);
return true;
}
else if(character>=65 && character<=70) // [A-F]
{
ival = static_cast<char>(10+character-65);
return true;
}
else if(character>=97 && character<=102) // [a-f]
{
ival = static_cast<char>(10+character-97);
return true;
}
ival = 0;
return false;
}
// convert array of 8 hex ascii to f32
// The input hexascii is required to be a little-endian representation
// as used in the iridas file format
// "AD10753F" -> 0.9572857022285461f on ALL architectures
bool hexasciitofloat(float& fval, const char * ascii)
{
// Convert all ASCII numbers to their numerical representations
char asciinums[8];
for(unsigned int i=0; i<8; ++i)
{
if(!hexasciitoint(asciinums[i], ascii[i]))
{
return false;
}
}
unsigned char * fvalbytes = reinterpret_cast<unsigned char *>(&fval);
#if OCIO_LITTLE_ENDIAN
// Since incoming values are little endian, and we're on little endian
// preserve the byte order
fvalbytes[0] = (unsigned char) (asciinums[1] | (asciinums[0] << 4));
fvalbytes[1] = (unsigned char) (asciinums[3] | (asciinums[2] << 4));
fvalbytes[2] = (unsigned char) (asciinums[5] | (asciinums[4] << 4));
fvalbytes[3] = (unsigned char) (asciinums[7] | (asciinums[6] << 4));
#else
// Since incoming values are little endian, and we're on big endian
// flip the byte order
fvalbytes[3] = (unsigned char) (asciinums[1] | (asciinums[0] << 4));
fvalbytes[2] = (unsigned char) (asciinums[3] | (asciinums[2] << 4));
fvalbytes[1] = (unsigned char) (asciinums[5] | (asciinums[4] << 4));
fvalbytes[0] = (unsigned char) (asciinums[7] | (asciinums[6] << 4));
#endif
return true;
}