I'm trying to write an inference program with YOLO model in C++. I've searched for some info about darknet, but it has to use .cfg file to import the model structure(which is a bit too complicated for me...), thus I want to do the program with tensorflow.
(My model weight is converted from .hdf5(used in python) to .pb(used in C++))
I've found some examples written in python, it seems like they have done some work before the inference process... Source
def yolo_eval(yolo_outputs,
anchors,
num_classes,
image_shape,
max_boxes=50,
score_threshold=.6,
iou_threshold=.5):
"""Evaluate YOLO model on given input and return filtered boxes."""
num_layers = len(yolo_outputs)
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # default setting
input_shape = K.shape(yolo_outputs[0])[1:3] * 32
boxes = []
box_scores = []
for l in range(num_layers):
_boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
boxes.append(_boxes)
box_scores.append(_box_scores)
boxes = K.concatenate(boxes, axis=0)
box_scores = K.concatenate(box_scores, axis=0)
mask = box_scores >= score_threshold
max_boxes_tensor = K.constant(max_boxes, dtype='int32')
boxes_ = []
scores_ = []
classes_ = []
for c in range(num_classes):
# TODO: use keras backend instead of tf.
class_boxes = tf.boolean_mask(boxes, mask[:, c])
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
nms_index = tf.image.non_max_suppression(
class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
class_boxes = K.gather(class_boxes, nms_index)
class_box_scores = K.gather(class_box_scores, nms_index)
classes = K.ones_like(class_box_scores, 'int32') * c
boxes_.append(class_boxes)
scores_.append(class_box_scores)
classes_.append(classes)
boxes_ = K.concatenate(boxes_, axis=0)
scores_ = K.concatenate(scores_, axis=0)
classes_ = K.concatenate(classes_, axis=0)
return boxes_, scores_, classes_
I've printed out the return value
and it looks like this
boxes-> Tensor("concat_11:0", shape=(?, 4), dtype=float32)
scores-> Tensor("concat_12:0", shape=(?,), dtype=float32)
classes-> Tensor("concat_13:0", shape=(?,), dtype=int32)
the original output of my YOLO model(.hdf5) is (I got this by printed out model.output)
tf.Tensor 'conv2d_59_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
tf.Tensor 'conv2d_67_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
tf.Tensor 'conv2d_75_1/BiasAdd:0' shape=(?, ?, ?, 21) dtype=float32
And the inference part of the python code is
out_boxes, out_scores, out_classes = sess.run(
[boxes, scores, classes],
feed_dict={
yolo_model.input: image_data,
input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
Compare to the python version of inference code,
C++ part is... (Reference)
int main()
{
string image = "test.jpg";
string graph = "yolo_weight.pb";
string labels = "coco.names";
int32 input_width = 416;
int32 input_height = 416;
float input_mean = 0;
float input_std = 255;
string input_layer = "input_1:0";
std::vector<std::string> output_layer = {"conv2d_59/BiasAdd:0", "conv2d_67/BiasAdd:0", "conv2d_75/BiasAdd:0" };
std::unique_ptr<tensorflow::Session> session;
string graph_path = tensorflow::io::JoinPath(root_dir, graph);
Status load_graph_status = LoadGraph(graph_path, &session);
std::vector<Tensor> resized_tensors;
string image_path = tensorflow::io::JoinPath(root_dir, image);
Status read_tensor_status = ReadTensorFromImageFile(image_path, input_height, input_width,
input_mean, input_std, &resized_tensors);
Tensor inpTensor = Tensor(DT_FLOAT, TensorShape({ 1, input_height, input_width, 3 }));
std::vector<Tensor> outputs;
cv::Mat srcImage = cv::imread(image);
cv::resize(srcImage, srcImage, cv::Size(input_width, input_height));
srcImage.convertTo(srcImage, CV_32FC3);
srcImage = srcImage / 255;
string ty = type2str(srcImage.type());
float *p = (&inpTensor)->flat<float>().data();
cv::Mat tensorMat(input_height, input_width, CV_32FC3, p);
srcImage.convertTo(tensorMat, CV_32FC3);
Status run_status = session->Run({{ input_layer, inpTensor }}, { output_layer }, {}, &outputs);
int cc = 1;
auto output_detection_class = outputs[0].tensor<float, 4>();
std::cout << "detection scores" << std::endl;
std::cout << "typeid(output_detection_scoreclass).name->" << typeid(output_detection_class).name() << std::endl;
for (int i = 0; i < 13; ++i)
{
for (int j = 0; j < 13; ++j)
{
for (int k = 0; k < 21; ++k)
{
// using (index_1, index_2, index_3) to access the element in a tensor
printf("i->%d, j->%d, k->%d\t", i, j, k);
std::cout << output_detection_class(1, i, j, k) << "\t";
cc += 1;
if (cc % 4 == 0)
{
std::cout << "\n";
}
}
}
std::cout << std::endl;
}
return 0;
}
The output of c++ version inference part is
outputs.size()-> 3
outputs[0].shape()-> [1,13,13,21]
outputs[1].shape()-> [1,26,26,21]
outputs[2].shape()-> [1,52,52,21]
But the output I get is pretty weird...
(The output value of outputs[0] doesn't seems like any one of score, class, or coordinates...)
So I'm wondering is it because I miss the part written in python before its inference? Or I use the wrong way to get my output data?
I've checked some related questions and answers...
1.Yolo v3 model output clarification with keras
2.Convert YoloV3 output to coordinates of bounding box, label and confidence
3.How to access tensorflow::Tensor C++
But I still can't figure out how to make it :(
I also found a repo which might be helpful,
I've taken a look at its yolo.cpp, but its model output tensor's shape is different from mine, I'm not sure if I can revise the code directly, its output tensor is
tf.Tensor 'import/output:0' shape=(?, 735) dtype = float32
Any help or advice is appreciated...
In case you're still struggling with this, I don't see where you are applying the Sigmoid and Exp to the output layer values.
You might look at this paper, which describes how to handle the output.
https://medium.com/analytics-vidhya/yolo-v3-theory-explained-33100f6d193
As Bryan said, there're still some actions need to be done with the output layer.
So in my case (according to this repo), I add this to the YOLO class (at file yolo.py) for adding those post-processing when saving model:
def output_pb(self, out_dir, out_pb):
out_bx = self.boxes.name.split(":")[0]
out_sc = self.scores.name.split(":")[0]
out_cs = self.classes.name.split(":")[0]
print(out_bx, out_sc, out_cs)
frozen_graph = tf.graph_util.remove_training_nodes(tf.graph_util.convert_variables_to_constants(self.sess, self.sess.graph.as_graph_def(), [out_bx, out_sc, out_cs]))
tf.io.write_graph(frozen_graph, out_dir, out_pb, as_text=False)
print("===== FINISH saving new pb file =====")
When saving model, I called the function like this:
yolo = YOLO(**config)
yolo.output_pb(output_dir, output_pb_name)
And when doing inference in C++,
the whole process goes like this:
// initialize model
YOLO* YOLO_data = (YOLO*)Init_DllODM_object(config);
// do some stuff to set data in YOLO_data
cv::Mat input_pic = "whatever_pic.png";
predict(YOLO_data, input_pic, YOLO_data ->bbox_res, YOLO_data ->score_res, YOLO_data ->class_res);
// draw result on pic
cv::Mat res = show_result(YOLO_data, input_pic);
Detailed code is here:
// yolo_cpp.h
struct YOLO
{
float score_thres;
std::vector<int> class_res;
std::vector<float> bbox_res, score_res;
std::string inp_tensor_name;
std::string placeholder_name;
std::vector<std::string> out_tensors;
Session* session;
Tensor t, inpTensor;
std::vector<tensorflow::Tensor> outTensor;
std::vector<int> MD_size;
std::vector<int> inp_pic_size;
std::vector<std::string> md_class_list;
std::vector<cv::Scalar> color_list;
int show_score;
int score_type;
int return_origin;
};
// yolo_cpp.cpp
void* Init_DllODM_object(json config)
{
std::string model_path = config["model"].get<std::string>();
YOLO* YOLO_data = new YOLO();
auto options = tensorflow::SessionOptions();
GraphDef graphdef;
// loading model to graph
Status status_load = ReadBinaryProto(Env::Default(), model_path, &graphdef);
options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(0.7);
options.config.mutable_gpu_options()->set_allow_growth(true);
int node_count = graphdef.node_size();
for (int i = 0; i < node_count; i++)
{
auto n = graphdef.node(i);
if (n.name().find("input_") != string::npos)
{
YOLO_data->inp_tensor_name = n.name();
}
else if (n.name().find("Placeholder_") != string::npos)
{
YOLO_data->placeholder_name = n.name();
}
else if (i == node_count - 5)
{
YOLO_data->out_tensors.push_back(n.name());
}
else if (i == node_count - 3)
{
YOLO_data->out_tensors.push_back(n.name());
}
else if (i == node_count - 1)
{
YOLO_data->out_tensors.push_back(n.name());
}
}
if (!status_load.ok()) {
std::cout << "ERROR: Loading model failed..." << std::endl;
std::cout << model_path << status_load.ToString() << "\n";
}
std::vector<int> MD_size_ = config["input_size"];
YOLO_data->MD_size = MD_size_;
std::vector<int> inp_pic_size_ = config["input_pic_size"];
YOLO_data->inp_pic_size = inp_pic_size_;
YOLO_data->inpTensor = Tensor(DT_FLOAT, TensorShape({ 1, YOLO_data->MD_size[0], YOLO_data->MD_size[1], 3 })); // input tensor
YOLO_data->t = Tensor(DT_FLOAT, TensorShape({ 2 }));
//ref: https://stackoverflow.com/questions/36804714/define-a-feed-dict-in-c-for-tensorflow-models
auto t_matrix = YOLO_data->t.tensor<float, 1>();
t_matrix(0) = YOLO_data->inp_pic_size[0];
t_matrix(1) = YOLO_data->inp_pic_size[1];
// create session
Status status_newsess = NewSession(options, &YOLO_data->session); //for the usage of gpu setting
Status status_create = YOLO_data->session->Create(graphdef);
if (!status_create.ok()) {
std::cout << "ERROR: Creating graph in session failed.." << status_create.ToString() << std::endl;
}
else {
std::cout << "----------- Successfully created session and load graph -------------" << std::endl;
}
return YOLO_data;
}
int predict(YOLO* YOLO_, cv::Mat srcImage, std::vector<float>& bbox_res, std::vector<float>& score_res, std::vector<int>& class_res)
{
// read image -> input image
if (srcImage.empty()) // check if image can open correctly
{
std::cout << "can't open the image!!!!!!!" << std::endl;
int res = -1;
return res;
}
// ref: https://ppt.cc/f7ERNx
std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
{ YOLO_->inp_tensor_name, YOLO_->inpTensor },
{ YOLO_->placeholder_name, YOLO_->t },
};
srcImage = letterbox_image(srcImage, YOLO_->MD_size[0], YOLO_->MD_size[1]);
convertCVMatToTensor(YOLO_, srcImage);
Status status_run = YOLO_->session->Run({ inputs }, { YOLO_->out_tensors }, {}, &YOLO_->outTensor);
if (!status_run.ok()) {
std::cout << "ERROR: RUN failed..." << std::endl;
std::cout << status_run.ToString() << "\n";
int res = -1;
return res;
}
TTypes<float>::Flat pp1 = YOLO_->outTensor[0].flat<float>();
TTypes<float>::Flat pp2 = YOLO_->outTensor[1].flat<float>();
TTypes<int>::Flat pp3 = YOLO_->outTensor[2].flat<int>();
int pp1_idx;
for (int i = 0; i < pp2.size(); i++)
{
pp1_idx = i * 4;
bbox_res.push_back(pp1(pp1_idx));
bbox_res.push_back(pp1(pp1_idx + 1));
bbox_res.push_back(pp1(pp1_idx + 2));
bbox_res.push_back(pp1(pp1_idx + 3));
score_res.push_back(pp2(i));
class_res.push_back(pp3(i));
}
return 0;
}
cv::Mat show_result(YOLO* inf_obj, cv::Mat inp_pic)
{
int bbox_idx;
std::string plot_str;
bool under_thresh = false;
std::vector<int> del_idx;
for (int i = 0; i < inf_obj->class_res.size(); i++)
{
int y_min, y_max, x_min, x_max;
bbox_idx = i * 4;
y_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx] + 0.5));
x_min = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 1] + 0.5));
y_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 2] + 0.5));
x_max = std::max(0, (int)floor(inf_obj->bbox_res[bbox_idx + 3] + 0.5));
//std::cout << md_class_list[class_res[i]] << ", ";
//std::cout << score_res[i] << ",";
//std::cout << "[" << x_min << ", " << y_min << ", " << x_max << ", " << y_max << "]\n";
if (inf_obj->show_score)
{
if (inf_obj->score_type)
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i] * 100, 2)).substr(0, 5) + "%";
else
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]] + ", " + std::to_string(rounding(inf_obj->score_res[i], 2)).substr(0, 4);
}
else
plot_str = inf_obj->md_class_list[inf_obj->class_res[i]];
if (inf_obj->score_res[i] >= inf_obj->score_thres)
{
inp_pic = plot_one_box(inp_pic, x_min, y_min, x_max, y_max, plot_str, inf_obj->color_list[inf_obj->class_res[i]]);
}
else
{
//std::cout << "score_res[i]->" << score_res[i] << "under thresh!!" << std::endl;
under_thresh = true;
del_idx.push_back(i);
}
}
if (under_thresh)
{
//std::cout << "*** deleting element" << std::endl;
for (int x = 0; x < del_idx.size(); x++)
{
bbox_idx = (del_idx[x] - x) * 4;
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 3);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 2);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx + 1);
inf_obj->bbox_res.erase(inf_obj->bbox_res.begin() + bbox_idx);
inf_obj->score_res.erase(inf_obj->score_res.begin() + del_idx[x] - x);
inf_obj->class_res.erase(inf_obj->class_res.begin() + del_idx[x] - x);
}
del_idx.clear();
}
return inp_pic;
}
Since my code is used for dll, I arranged in this way.
There are still some redundant code I didn't delete,
but I think the whole process can be done with these provided code so far.
Hope this help :D
I'm trying to implement the following c++ code in python. However, the data of the two differs when I print it out. I read this post Receiving 16-bit integers in Python and tried both "> h" and "< h" for the unpack function, but it still doesn't seem to give me the same data as the c++ code. I do however successfully read the width and height correctly. Just the data is wrong. What am I missing?
int16_t* loadDepthImageCompressed( const char* fname ){
//now read the depth image
FILE* pFile = fopen(fname, "rb");
if(!pFile){
std::cerr << "could not open file " << fname << std::endl;
return NULL;
}
int im_width = 0;
int im_height = 0;
bool success = true;
std::cout << sizeof(int) << std::endl;
std::cout << sizeof(int16_t) << std::endl;
success &= ( fread(&im_width,sizeof(int),1,pFile) == 1 ); // read width of depthmap
success &= ( fread(&im_height,sizeof(int),1,pFile) == 1 ); // read height of depthmap
int16_t* depth_img = new int16_t[im_width*im_height];
std::cout << im_width << std::endl;
std::cout << im_height << std::endl;
int numempty;
int numfull;
int p = 0;
while(p < im_width*im_height ){
success &= ( fread( &numempty,sizeof(int),1,pFile) == 1 );
for(int i = 0; i < numempty; i++)
depth_img[ p + i ] = 0;
success &= ( fread( &numfull,sizeof(int), 1, pFile) == 1 );
success &= ( fread( &depth_img[ p + numempty ], sizeof(int16_t), numfull, pFile) == (unsigned int) numfull );
p += numempty+numfull;
}
fclose(pFile);
if(success)
return depth_img;
else{
delete [] depth_img;
return NULL;
}
}
The python Code
def loadDepthImageCompressed(fname):
with open(fname,'rb') as depth_file:
im_width = struct.unpack('i',depth_file.read(4))[0]
im_height = struct.unpack('i',depth_file.read(4))[0]
depth_img = [None] * (im_width * im_height)
p = 0
while(p < im_width * im_height):
numempty = struct.unpack('i',depth_file.read(4))[0]
for i in range(numempty):
depth_img[p + i] = 0;
numfull = struct.unpack('i',depth_file.read(4))[0]
for i in range(numfull):
depth_img[p+numempty + i] = struct.unpack('>h',depth_file.read(2))[0]
p += numempty+numfull
return depth_img
I am trying to rewrite the python code for mnist_client to c++. Since I am new to tensorflow and TF serving I am having some difficulties. I went through the tutorials and the c++ client example (inception_client).
Python mnist_client works without any problems, but when I run my c++ client it gives me the arg[0] is not a matrix
gRPC call return code: 3: In[0] is not a matrix
[[Node: MatMul = MatMul[T=DT_FLOAT, _output_shapes=[[?,10]], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/cpu:0"](_arg_x_0_0, Variable/read)]]
I trained the model as in the tutorial and I've checked that the minst data I read is ok.
From this:
tensorflow Invalid argument: In[0] is not a matrix ,
I understand MatMul needs at least 2-dim data. But, I went through the c++ code for inception_client and python mnist_client and both read the image data into a 1-dim char array...
What am I missing here?
The code for inception_client: https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/inception_client.cc
Any help would be much appreciated. :)
class ServingClient{
public:
ServingClient(std::shared_ptr<Channel> channel) : stub_(PredictionService::NewStub(channel)){}
tensorflow::string callPredict( const tensorflow::string &model_name,
const tensorflow::string &model_signature,
const int num_tests){
PredictRequest request;
PredictResponse response;
ClientContext context;
int image_size;
int image_offset = 16;
int label_offset = 8;
request.mutable_model_spec()->set_name(model_name);
request.mutable_model_spec()->set_signature_name(model_signature);
google::protobuf::Map<tensorflow::string, tensorflow::TensorProto> &inputs = *request.mutable_inputs();
std::fstream imageFile("t10k-images-idx3-ubyte", std::ios::binary | std::ios::in);
std::fstream labelFile("t10k-labels-idx1-ubyte", std::ios::binary | std::ios::in);
labelFile.seekp(0);
imageFile.seekp(0);
uint32_t magic_number_images;
uint32_t nImages;
uint32_t magic_number_labels;
uint32_t rowsI =0;
uint32_t rowsL =0;
uint32_t colsI = 0;
uint32_t colsL = 0;
imageFile.read((char *)&magic_number_images, sizeof(magic_number_images));
imageFile.read((char *)&nImages, sizeof(nImages));
imageFile.read((char *)(&rowsI), sizeof(rowsI));
imageFile.read((char *)&colsI, sizeof(colsI));
image_size = ReverseInt(rowsI) * ReverseInt(colsI);
labelFile.read((char *)&magic_number_labels, sizeof(magic_number_labels));
labelFile.read((char *)&rowsL, sizeof(rowsL));
for(int i=0; i<num_tests; i++){
tensorflow::TensorProto proto;
labelFile.seekp(label_offset);
imageFile.seekp(image_offset);
//read mnist image
char *img = new char[image_size]();
char label = 0;
imageFile.read((char *)img, image_size);
image_offset += image_size;
//read label
labelFile.read(&label, 1);
label_offset++;
//predict
proto.set_dtype(tensorflow::DataType::DT_STRING);
proto.add_string_val(img, image_size);
proto.mutable_tensor_shape()->add_dim()->set_size(1);
inputs["images"] = proto;
Status status = stub_->Predict(&context, request, &response);
delete[] img;
if(status.ok()){
std::cout << "status OK." << std::endl;
OutMap &map_outputs = *response.mutable_outputs();
OutMap::iterator iter;
int output_index = 0;
for(iter = map_outputs.begin(); iter != map_outputs.end(); ++iter){
tensorflow::TensorProto &result_tensor_proto = iter->second;
tensorflow::Tensor tensor;
//check if response converted succesfully
bool converted = tensor.FromProto(result_tensor_proto);
if (converted) {
std::cout << "the result tensor[" << output_index << "] is:" << std::endl
<< tensor.SummarizeValue(10) << std::endl;
}
else {
std::cout << "the result tensor[" << output_index
<< "] convert failed." << std::endl;
}
++output_index;
}
}
else{
std::cout << "gRPC call return code: " << status.error_code() << ": "
<< status.error_message() << std::endl;
}
}
imageFile.close();
labelFile.close();
}
private:
std::unique_ptr<PredictionService::Stub> stub_;
};
EDIT 1: I assume the problem must be in how the model was created and what dimension is the data the client sends.
I used the provided python program that trains and exports the model which sets the dimensions:
feature_configs = {'x': tf.FixedLenFeature(shape=[784], dtype=tf.float32),}
tf_example = tf.parse_example(serialized_tf_example, feature_configs)
x = tf.identity(tf_example['x'], name='x') # use tf.identity() to assign name
y_ = tf.placeholder('float', shape=[None, 10])
w = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
As expected, the fix was obvious.
All that had to be done was to add another dimension:
proto.mutable_tensor_shape()->add_dim()->set_size(image_size);
to get [image_size,1] shape.
I need to send an array (representing an image) through a named FIFO pipe from a python process to a c++ process, and then back the other way (on a Linux system).
The below code works great when using named pipes between two Python processes. It uses numpy's tostring() and fromstring() functions:
Send frames over named pipe (Python)
import cv2
import numpy as np
from time import sleep
##########################################################
FIFO_Images = "./../pipes/images.fifo"
videoName = "./../../videos/videoName.avi"
delim = "break"
##########################################################
def sendImage(h, w, d, pixelarray):
imageString = pixelarray.tostring()
with open(FIFO_Images, "w") as f:
f.write(str(h)+ delim + str(w)+ delim + str(d) + delim + imageString)
sleep(.01)
return
##########################################################
cap = cv2.VideoCapture(videoName)
while(cap.isOpened()):
ret, frame_rgb = cap.read()
h, w, d = frame_rgb.shape
sendImage(h, w, d, frame_rgb)
cap.release()
cv2.destroyAllWindows()
Read frames over named pipe (Python)
import cv2
import numpy as np
##########################################################
FIFO_Images = "./../pipes/images.fifo"
delim = "break"
##########################################################
def getFrame():
with open(FIFO_Images, "r") as f:
data = f.read().split(delim)
#parse incoming string, which has format (height, width, depth, imageData)
h=int(data[0])
w=int(data[1])
d=int(data[2])
imageString = data[3]
#convert array string into numpy array
array = np.fromstring(imageString, dtype=np.uint8)
#reshape numpy array into the required dimensions
frame = array.reshape((h,w,d))
return frame
##########################################################
while(True):
frame = getFrame()
cv2.imshow('frame', frame)
cv2.waitKey(1) & 0xFF
However, I couldn't figure out how to read the entire image from the pipe on the cpp side, since it takes "\n" as a delimiter for the read automatically.
My workaround was to do a base64 encoding on the "tostring()" image, then send that over the pipe. This works, but the base64 decoding on the other slide is much too slow for real-time applications (~0.2 seconds per frame). Code:
Send base64-encoded images over named pipe (Python)
import cv2
import numpy as np
from time import time
from time import sleep
import base64
##########################################################
FIFO_Images = "./../pipes/images.fifo"
videoName = "./../../videos/videoName.avi"
delim = ";;"
##########################################################
def sendImage(h, w, d, pixelarray):
flat = pixelarray.flatten()
imageString = base64.b64encode(pixelarray.tostring())
fullString = str(h)+ delim + str(w)+ delim + str(d)+ delim + imageString + delim + "\n"
with open(FIFO_Images, "w") as f:
f.write(fullString)
return
##########################################################
cap = cv2.VideoCapture(videoName)
count = 0
while(cap.isOpened()):
ret, frame_rgb = cap.read()
h, w, d = frame_rgb.shape
frame_gbr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
sendImage(h, w, d, frame_rgb)
cap.release()
cv2.destroyAllWindows()
Read base64-encoded images over named pipe (C++)
#include "opencv2/opencv.hpp"
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <linux/stat.h>
#include <ctime>
using namespace std;
using namespace cv;
#define FIFO_FILE "./../../../pipes/images.fifo"
#define MAX_BUF 10000000
FILE *fp;
char readbuf[MAX_BUF + 1]; //add 1 to the expected size to accomodate the mysterious "extra byte", which I think signals the end of the line.
/************************BASE64 Decoding*********************************************/
std::string base64_encode(unsigned char const* , unsigned int len);
std::string base64_decode(std::string const& s);
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
static inline bool is_base64(unsigned char c) {
return (isalnum(c) || (c == '+') || (c == '/'));
}
std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
std::string ret;
int i = 0;
int j = 0;
unsigned char char_array_3[3];
unsigned char char_array_4[4];
while (in_len--) {
char_array_3[i++] = *(bytes_to_encode++);
if (i == 3) {
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for(i = 0; (i <4) ; i++)
ret += base64_chars[char_array_4[i]];
i = 0;
}
}
if (i)
{
for(j = i; j < 3; j++)
char_array_3[j] = '\0';
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (j = 0; (j < i + 1); j++)
ret += base64_chars[char_array_4[j]];
while((i++ < 3))
ret += '=';
}
return ret;
}
std::string base64_decode(std::string const& encoded_string) {
int in_len = encoded_string.size();
int i = 0;
int j = 0;
int in_ = 0;
unsigned char char_array_4[4], char_array_3[3];
std::string ret;
while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
char_array_4[i++] = encoded_string[in_]; in_++;
if (i ==4) {
for (i = 0; i <4; i++)
char_array_4[i] = base64_chars.find(char_array_4[i]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; (i < 3); i++)
ret += char_array_3[i];
i = 0;
}
}
if (i) {
for (j = i; j <4; j++)
char_array_4[j] = 0;
for (j = 0; j <4; j++)
char_array_4[j] = base64_chars.find(char_array_4[j]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
}
return ret;
}
/*********************************************************************/
int stringToInt(string str)
{
int num;
if (!(istringstream(str) >> num)) num = 0;
return num;
}
/*********************************************************************/
bool timerOn = 0;
clock_t timerStart;
void Timer(string process)
{
if (!timerOn)
{
timerStart = clock();
timerOn = true;
}
else if (timerOn)
{
double duration = (clock() - timerStart) / (double) CLOCKS_PER_SEC;
cout << "Time to complete: ";
printf("%.2f", duration);
cout << ": " << process << endl;
timerOn = false;
}
}
/*********************************************************************/
void getFrame()
{
string fullString;
string delimiter = ";;";
size_t pos = 0;
string token;
int h;
int w;
int d;
string imgString;
int fifo;
bool cont(true);
/***************************
Read from the pipe
www.tldp.org/LDP/lpg/node18.html
***************************/
Timer("Read from pipe");
fp = fopen(FIFO_FILE, "r");
fgets(readbuf, MAX_BUF + 1, fp); // Stops when MAX_BUF characters are read, the newline character ("\n") is read, or the EOF (end of file) is reached
string line(readbuf);
fclose(fp);
Timer("Read from pipe");
//////parse the string into components
Timer("Parse string");
int counter = 0;
while ((pos = line.find(delimiter)) != string::npos)
{
token = line.substr(0,pos);
if (counter == 0)
{
h = stringToInt(token);
}
else if (counter == 1)
{
w = stringToInt(token);
}
else if (counter == 2)
{
d = stringToInt(token);
}
else if (counter == 3)
{
imgString = token;
//cout << imgString[0] << endl;
}
else
{
cout << "ERROR: Too many paramaters passed" << endl;
return;
}
line.erase(0, pos + delimiter.length());
counter ++;
}
if (counter == 3)
{
imgString = token;
}
if (counter < 3)
{
cout << "ERROR: Not enough paramaters passed: " << counter << endl;
//return;
}
Timer("Parse string");
/***************************
Convert from Base64
***************************/
Timer("Decode Base64");
std::string decoded = base64_decode(imgString);
Timer("Decode Base64");
/***************************
Convert to vector of ints
***************************/
Timer("Convert to vector of ints");
std::vector<uchar> imgVector;
for (int i = 0; i < decoded.length(); i = i+1) // + 4)
{
int temp = (char(decoded[i]));
imgVector.push_back(temp);
}
Timer("Convert to vector of ints");
//////convert the vector into a matrix
Mat frame = Mat(imgVector).reshape(d, h);
namedWindow("Frame", WINDOW_AUTOSIZE);
imshow("Frame", frame);
waitKey(1);
}
int main()
{
/* Create the FIFO if it does not exist */
umask(0);
mknod(FIFO_FILE, S_IFIFO|0666, 0);
while(1)
{
getFrame();
}
return 0;
}
There must be a more efficient way to accomplish this. Can anyone make a recommendation? While I'm happy to hear suggestions for other methods to accomplish this, I am constrained to using named pipes for now.
This is overcomplicated. If you need to send binary data, send their length first, then newline (\n), and then the data (raw, no base64). Receive it on the other side by readling a line, parsing the number and then just reading a block of data of given length.
Example - writing binary data to a FIFO (or file) in Python:
#!/usr/bin/env python3
import os
fifo_name = 'fifo'
def main():
data = b'blob\n\x00 123'
try:
os.mkfifo(fifo_name)
except FileExistsError:
pass
with open(fifo_name, 'wb') as f:
# b for binary mode
f.write('{}\n'.format(len(data)).encode())
f.write(data)
if __name__ == '__main__':
main()
Reading binary data from FIFO in C++:
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sys/stat.h>
int main(int argc, char *argv[]) {
const char *fifo_name = "fifo";
mknod(fifo_name, S_IFIFO | 0666, 0);
std::ifstream f(fifo_name);
std::string line;
getline(f, line);
auto data_size = std::stoi(line);
std::cout << "Size: " << data_size << std::endl;
std::string data;
{
std::vector<char> buf(data_size);
f.read(buf.data(), data_size);
// write to vector data is valid since C++11
data.assign(buf.data(), buf.size());
}
if (!f.good()) {
std::cerr << "Read failed" << std::endl;
}
std::cout << "Data size: " << data.size() << " content: " << data << std::endl;
}
I'm trying to translate the following Python code into C++:
import struct
import binascii
inputstring = ("0000003F" "0000803F" "AD10753F" "00000080")
num_vals = 4
for i in range(num_vals):
rawhex = inputstring[i*8:(i*8)+8]
# <f for little endian float
val = struct.unpack("<f", binascii.unhexlify(rawhex))[0]
print val
# Output:
# 0.5
# 1.0
# 0.957285702229
# -0.0
So it reads 32-bit worth of the hex-encoded string, turns it into a byte-array with the unhexlify method, and interprets it as a little-endian float value.
The following almost works, but the code is kind of crappy (and the last 00000080 parses incorrectly):
#include <sstream>
#include <iostream>
int main()
{
// The hex-encoded string, and number of values are loaded from a file.
// The num_vals might be wrong, so some basic error checking is needed.
std::string inputstring = "0000003F" "0000803F" "AD10753F" "00000080";
int num_vals = 4;
std::istringstream ss(inputstring);
for(unsigned int i = 0; i < num_vals; ++i)
{
char rawhex[8];
// The ifdef is wrong. It is not the way to detect endianness (it's
// always defined)
#ifdef BIG_ENDIAN
rawhex[6] = ss.get();
rawhex[7] = ss.get();
rawhex[4] = ss.get();
rawhex[5] = ss.get();
rawhex[2] = ss.get();
rawhex[3] = ss.get();
rawhex[0] = ss.get();
rawhex[1] = ss.get();
#else
rawhex[0] = ss.get();
rawhex[1] = ss.get();
rawhex[2] = ss.get();
rawhex[3] = ss.get();
rawhex[4] = ss.get();
rawhex[5] = ss.get();
rawhex[6] = ss.get();
rawhex[7] = ss.get();
#endif
if(ss.good())
{
std::stringstream convert;
convert << std::hex << rawhex;
int32_t val;
convert >> val;
std::cerr << (*(float*)(&val)) << "\n";
}
else
{
std::ostringstream os;
os << "Not enough values in LUT data. Found " << i;
os << ". Expected " << num_vals;
std::cerr << os.str() << std::endl;
throw std::exception();
}
}
}
(compiles on OS X 10.7/gcc-4.2.1, with a simple g++ blah.cpp)
Particularly, I'd like to get rid of the BIG_ENDIAN macro stuff, as I'm sure there is a nicer way to do this, as this post discusses.
Few other random details - I can't use Boost (too large a dependency for the project). The string will usually contain between 1536 (83*3) and 98304 float values (323*3), at most 786432 (643*3)
(edit2: added another value, 00000080 == -0.0)
The following is your updated code modified to remove the #ifdef BIG_ENDIAN block. It uses a read technique that should be host byte order independent. It does this by reading the hex bytes (which are little endian in your source string) into a big endian string format compatible with the iostream std::hex operator. Once in this format it should not matter what the host byte order is.
Additionally, it fixes a bug in that rawhex needs to be zero terminated to be inserted into convert without trailing garbage in some cases.
I do not have a big endian system to test on, so please verify on your platform. This was compiled and tested under Cygwin.
#include <sstream>
#include <iostream>
int main()
{
// The hex-encoded string, and number of values are loaded from a file.
// The num_vals might be wrong, so some basic error checking is needed.
std::string inputstring = "0000003F0000803FAD10753F00000080";
int num_vals = 4;
std::istringstream ss(inputstring);
size_t const k_DataSize = sizeof(float);
size_t const k_HexOctetLen = 2;
for (uint32_t i = 0; i < num_vals; ++i)
{
char rawhex[k_DataSize * k_HexOctetLen + 1];
// read little endian string into memory array
for (uint32_t j=k_DataSize; (j > 0) && ss.good(); --j)
{
ss.read(rawhex + ((j-1) * k_HexOctetLen), k_HexOctetLen);
}
// terminate the string (needed for safe conversion)
rawhex[k_DataSize * k_HexOctetLen] = 0;
if (ss.good())
{
std::stringstream convert;
convert << std::hex << rawhex;
uint32_t val;
convert >> val;
std::cerr << (*(float*)(&val)) << "\n";
}
else
{
std::ostringstream os;
os << "Not enough values in LUT data. Found " << i;
os << ". Expected " << num_vals;
std::cerr << os.str() << std::endl;
throw std::exception();
}
}
}
I think the whole istringstring business is an overkill. It's much easier to parse this yourself one digit at a time.
First, create a function to convert a hex digit into an integer:
signed char htod(char c)
{
c = tolower(c);
if(isdigit(c))
return c - '0';
if(c >= 'a' && c <= 'f')
return c - 'a' + 10;
return -1;
}
Then simply convert the string into an integer. The code below doesn't check for errors and assumes big endianness -- but you should be able to fill in the details.
unsigned long t = 0;
for(int i = 0; i < s.length(); ++i)
t |= (t << 4) & htod(s[i]);
Then your float is
float f = * (float *) &t;
This is what we ended up with, OpenColorIO/src/core/FileFormatIridasLook.cpp
(Amardeep's answer with the unsigned uint32_t fix would likely work also)
// convert hex ascii to int
// return true on success, false on failure
bool hexasciitoint(char& ival, char character)
{
if(character>=48 && character<=57) // [0-9]
{
ival = static_cast<char>(character-48);
return true;
}
else if(character>=65 && character<=70) // [A-F]
{
ival = static_cast<char>(10+character-65);
return true;
}
else if(character>=97 && character<=102) // [a-f]
{
ival = static_cast<char>(10+character-97);
return true;
}
ival = 0;
return false;
}
// convert array of 8 hex ascii to f32
// The input hexascii is required to be a little-endian representation
// as used in the iridas file format
// "AD10753F" -> 0.9572857022285461f on ALL architectures
bool hexasciitofloat(float& fval, const char * ascii)
{
// Convert all ASCII numbers to their numerical representations
char asciinums[8];
for(unsigned int i=0; i<8; ++i)
{
if(!hexasciitoint(asciinums[i], ascii[i]))
{
return false;
}
}
unsigned char * fvalbytes = reinterpret_cast<unsigned char *>(&fval);
#if OCIO_LITTLE_ENDIAN
// Since incoming values are little endian, and we're on little endian
// preserve the byte order
fvalbytes[0] = (unsigned char) (asciinums[1] | (asciinums[0] << 4));
fvalbytes[1] = (unsigned char) (asciinums[3] | (asciinums[2] << 4));
fvalbytes[2] = (unsigned char) (asciinums[5] | (asciinums[4] << 4));
fvalbytes[3] = (unsigned char) (asciinums[7] | (asciinums[6] << 4));
#else
// Since incoming values are little endian, and we're on big endian
// flip the byte order
fvalbytes[3] = (unsigned char) (asciinums[1] | (asciinums[0] << 4));
fvalbytes[2] = (unsigned char) (asciinums[3] | (asciinums[2] << 4));
fvalbytes[1] = (unsigned char) (asciinums[5] | (asciinums[4] << 4));
fvalbytes[0] = (unsigned char) (asciinums[7] | (asciinums[6] << 4));
#endif
return true;
}