Data corruption Piping between C++ and Python

Data corruption Piping between C++ and Python - python

I am writing some code that takes binary data from Python, Pipes it to C++, does some processing on the data, (in this case calculating a mutual information metric) and then pipes the results back to python. While testing I have found that everything works fine if the data I send is a set of 2 arrays with dimensions less than 1500 X 1500, but if I send 2 arrays that are 2K X 2K I get back a lot of corrupted nonsense.
I currently believe the algorithmic portion of the code is fine because it provides the expected answers during testing with small (<=1500 X1500) arrays. That leads me to believe that this is an issue with either the stdin or stdout piping. That maybe I’m passing some intrinsic limit somewhere.
The Python Code and C++ code are below.
Python Code:
import subprocess
import struct
import sys
import numpy as np
#set up the variables needed
bytesPerDouble = 8
sizeX = 2000
sizeY = 2000
offset = sizeX*sizeY
totalBytesPerArray = sizeX*sizeY*bytesPerDouble
totalBytes = totalBytesPerArray*2 #the 2 is because we pass 2 different versions of the 2D array
#setup the testing data array
a = np.zeros(sizeX*sizeY*2, dtype='d')
for i in range(sizeX):
for j in range(sizeY):
a[j+i*sizeY] = i
a[j+i*sizeY+offset] = i
if i % 10 == 0:
a[j+i*sizeY+offset] = j
data = a.tobytes('C')
strTotalBytes = str(totalBytes)
strLineBytes = str(sizeY*bytesPerDouble)
#communicate with c++ code
print("starting C++ code")
command = "C:\Python27\PythonPipes.exe"
proc = subprocess.Popen([command, strTotalBytes, strLineBytes, str(sizeY), str(sizeX)], stdin=subprocess.PIPE,stderr=subprocess.PIPE,stdout=subprocess.PIPE)
ByteBuffer = (data)
proc.stdin.write(ByteBuffer)
print("Reading results back from C++")
for i in range(sizeX):
returnvalues = proc.stdout.read(sizeY*bytesPerDouble)
a = buffer(returnvalues)
b = struct.unpack_from(str(sizeY)+'d', a)
print str(b) + " " + str(i)
print('done')
C++ Code:
Main function:
int main(int argc, char **argv) {
int count = 0;
long totalbytes = stoi(argv[argc-4], nullptr,10); //bytes being transfered
long bytechunk = stoi(argv[argc - 3], nullptr, 10); //bytes being transfered at a time
long height = stoi(argv[argc-2], nullptr, 10); //bytes being transfered at a time
long width = stoi(argv[argc-1], nullptr, 10); //bytes being transfered at a time
long offset = totalbytes / sizeof(double) / 2;
data = new double[totalbytes/sizeof(double)];
int columnindex = 0;
//read in data from pipe
while (count<totalbytes) {
fread(&(data[columnindex]), 1, bytechunk, stdin);
columnindex += bytechunk / sizeof(double);
count += bytechunk;
}
//calculate the data transform
MutualInformation MI = MutualInformation();
MI.Initialize(data, height, width, offset);
MI.calcMI();
count = 0;
//*
//write out data to pipe
columnindex = 0;
while (count<totalbytes/2) {
fwrite(&(MI.getOutput()[columnindex]), 1, bytechunk, stdout);
fflush(stdout);
count += bytechunk;
columnindex += bytechunk/sizeof(double);
}
//*/
delete [] data;
return 0;
}
and in case you need it the actual processing code:
double MutualInformation::calcMI(){
double rvalue = 0.0;
std::map<int, map<int, double>> lHistXY = map<int, map<int, double>>();
std::map<int, double> lHistX = map<int, double>();
std::map<int, double> lHistY = map<int, double>();
typedef std::map<int, std::map<int, double>>::iterator HistXY_iter;
typedef std::map<int, double>::iterator HistY_iter;
//calculate Entropys and MI
double MI = 0.0;
double Hx = 0.0;
double Hy = 0.0;
double Px = 0.0;
double Py = 0.0;
double Pxy = 0.0;
//scan through the image
int ip = 0;
int jp = 0;
int chipsize = 3;
//setup zero array
double * zeros = new double[this->mHeight];
for (int j = 0; j < this->mHeight; j++){
zeros[j] = 0.0;
}
//zero out Output array
for (int i = 0; i < this->mWidth; i++){
memcpy(&(this->mOutput[i*this->mHeight]), zeros, this->mHeight*8);
}
double index = 0.0;
for (int ioutter = chipsize; ioutter < (this->mWidth - chipsize); ioutter++){
//write out processing status
//index = (double)ioutter;
//fwrite(&index, 8, 1, stdout);
//fflush(stdout);
//*
for (int j = chipsize; j < (this->mHeight - chipsize); j++){
//clear the histograms
lHistX.clear();
lHistY.clear();
lHistXY.clear();
//chip out a section of the image
for (int k = -chipsize; k <= chipsize; k++){
for (int l = -chipsize; l <= chipsize; l++){
ip = ioutter + k;
jp = j + l;
//update X histogram
if (lHistX.count(int(this->mData[ip*this->mHeight + jp]))){
lHistX[int(this->mData[ip*this->mHeight + jp])] += 1.0;
}else{
lHistX[int(this->mData[ip*this->mHeight + jp])] = 1.0;
}
//update Y histogram
if (lHistY.count(int(this->mData[ip*this->mHeight + jp+this->mOffset]))){
lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] += 1.0;
}
else{
lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] = 1.0;
}
//update X and Y Histogram
if (lHistXY.count(int(this->mData[ip*this->mHeight + jp]))){
//X Key exists check if Y key exists
if (lHistXY[int(this->mData[ip*this->mHeight + jp])].count(int(this->mData[ip*this->mHeight + jp + this->mOffset]))){
//X & Y keys exist
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] += 1;
}else{
//X exist but Y doesn't
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
}
}else{
//X Key Didn't exist
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
};
}
}
//calculate PMI, Hx, Hy
// iterator->first = key
// iterator->second = value
MI = 0.0;
Hx = 0.0;
Hy = 0.0;
for (HistXY_iter Hist2D_iter = lHistXY.begin(); Hist2D_iter != lHistXY.end(); Hist2D_iter++) {
Px = lHistX[Hist2D_iter->first] / ((double) this->mOffset);
Hx -= Px*log(Px);
for (HistY_iter HistY_iter = Hist2D_iter->second.begin(); HistY_iter != Hist2D_iter->second.end(); HistY_iter++) {
Py = lHistY[HistY_iter->first] / ((double) this->mOffset);
Hy -= Py*log(Py);
Pxy = HistY_iter->second / ((double) this->mOffset);
MI += Pxy*log(Pxy / Py / Px);
}
}
//normalize PMI to max(Hx,Hy) so that the PMI value runs from 0 to 1
if (Hx >= Hy && Hx > 0.0){
MI /= Hx;
}else if(Hy > Hx && Hy > 0.0){
MI /= Hy;
}
else{
MI = 0.0;
}
//write PMI to data output array
if (MI < 1.1){
this->mOutput[ioutter*this->mHeight + j] = MI;
}
else{
this->mOutput[ioutter*this->mHeight + j] = 0.0;
}
}
}
return rvalue;
}
with arrays that return something that makes sense I get output bounded between 0 and 1 like this:
(0.0, 0.0, 0.0, 0.7160627908692593, 0.6376472316395495, 0.5728801401524277,...
with the 2Kx2K or higher arrays I get nonesense like this (even though the code clamps the values between 0 and 1):
(-2.2491400820412374e+228, -2.2491400820412374e+228, -2.2491400820412374e+228, -2.2491400820412374e+228, -2.2491400820412374e+228,...
I would like to know why this code is corrupting the data set after it is assigned between 0.0 and 1, and whether or not it is a piping issue, a stdin/stdout issue, a buffer issue of some sort, or a coding issue I am simply not seeing.
Update I tried passing the data in smaller chunks using the code that Chris suggested with no luck. also of note is that I added a catch for ferror on stdout and it never got tripped so I am pretty sure that the bytes are at least making it to stdout. Is it possible that something else is writing to stdout somehow? maybe an extra byte making its way into stdout while my program is running? I find this doubtful as the errors are appearing consistently on the 4th fwrite read in the 10th entry.
Per Craig's request here is the full C++ code (the full Python Code is already posted): it is sitting in 3 files:
main.cpp
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <iostream>
#include "./MutualInformation.h"
double * data;
using namespace std;
void
xxwrite(unsigned char *buf, size_t wlen, FILE *fo)
{
size_t xlen;
for (; wlen > 0; wlen -= xlen, buf += xlen) {
xlen = wlen;
if (xlen > 1024)
xlen = 1024;
xlen = fwrite(buf, 1, xlen, fo);
fflush(fo);
}
}
int main(int argc, char **argv) {
int count = 0;
long totalbytes = stoi(argv[argc-4], nullptr,10); //bytes being transfered
long bytechunk = stoi(argv[argc - 3], nullptr, 10); //bytes being transfered at a time
long height = stoi(argv[argc-2], nullptr, 10); //bytes being transfered at a time
long width = stoi(argv[argc-1], nullptr, 10); //bytes being transfered at a time
long offset = totalbytes / sizeof(double) / 2;
data = new double[totalbytes/sizeof(double)];
int columnindex = 0;
//read in data from pipe
while (count<totalbytes) {
fread(&(data[columnindex]), 1, bytechunk, stdin);
columnindex += bytechunk / sizeof(double);
count += bytechunk;
}
//calculate the data transform
MutualInformation MI = MutualInformation();
MI.Initialize(data, height, width, offset);
MI.calcMI();
count = 0;
columnindex = 0;
while (count<totalbytes/2) {
xxwrite((unsigned char*)&(MI.getOutput()[columnindex]), bytechunk, stdout);
count += bytechunk;
columnindex += bytechunk/sizeof(double);
}
delete [] data;
return 0;
}
MutualInformation.h
#include <map>
using namespace std;
class MutualInformation
{
private:
double * mData;
double * mOutput;
long mHeight;
long mWidth;
long mOffset;
public:
MutualInformation();
~MutualInformation();
bool Initialize(double * data, long Height, long Width, long Offset);
const double * getOutput();
double calcMI();
};
MutualInformation.cpp
#include "MutualInformation.h"
MutualInformation::MutualInformation()
{
this->mData = nullptr;
this->mOutput = nullptr;
this->mHeight = 0;
this->mWidth = 0;
}
MutualInformation::~MutualInformation()
{
delete[] this->mOutput;
}
bool MutualInformation::Initialize(double * data, long Height, long Width, long Offset){
bool rvalue = false;
this->mData = data;
this->mHeight = Height;
this->mWidth = Width;
this->mOffset = Offset;
//allocate output data
this->mOutput = new double[this->mHeight*this->mWidth];
return rvalue;
}
const double * MutualInformation::getOutput(){
return this->mOutput;
}
double MutualInformation::calcMI(){
double rvalue = 0.0;
std::map<int, map<int, double>> lHistXY = map<int, map<int, double>>();
std::map<int, double> lHistX = map<int, double>();
std::map<int, double> lHistY = map<int, double>();
typedef std::map<int, std::map<int, double>>::iterator HistXY_iter;
typedef std::map<int, double>::iterator HistY_iter;
//calculate Entropys and MI
double MI = 0.0;
double Hx = 0.0;
double Hy = 0.0;
double Px = 0.0;
double Py = 0.0;
double Pxy = 0.0;
//scan through the image
int ip = 0;
int jp = 0;
int chipsize = 3;
//setup zero array
double * zeros = new double[this->mHeight];
for (int j = 0; j < this->mHeight; j++){
zeros[j] = 0.0;
}
//zero out Output array
for (int i = 0; i < this->mWidth; i++){
memcpy(&(this->mOutput[i*this->mHeight]), zeros, this->mHeight*8);
}
double index = 0.0;
for (int ioutter = chipsize; ioutter < (this->mWidth - chipsize); ioutter++){
for (int j = chipsize; j < (this->mHeight - chipsize); j++){
//clear the histograms
lHistX.clear();
lHistY.clear();
lHistXY.clear();
//chip out a section of the image
for (int k = -chipsize; k <= chipsize; k++){
for (int l = -chipsize; l <= chipsize; l++){
ip = ioutter + k;
jp = j + l;
//update X histogram
if (lHistX.count(int(this->mData[ip*this->mHeight + jp]))){
lHistX[int(this->mData[ip*this->mHeight + jp])] += 1.0;
}else{
lHistX[int(this->mData[ip*this->mHeight + jp])] = 1.0;
}
//update Y histogram
if (lHistY.count(int(this->mData[ip*this->mHeight + jp+this->mOffset]))){
lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] += 1.0;
}
else{
lHistY[int(this->mData[ip*this->mHeight + jp+this->mOffset])] = 1.0;
}
//update X and Y Histogram
if (lHistXY.count(int(this->mData[ip*this->mHeight + jp]))){
//X Key exists check if Y key exists
if (lHistXY[int(this->mData[ip*this->mHeight + jp])].count(int(this->mData[ip*this->mHeight + jp + this->mOffset]))){
//X & Y keys exist
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] += 1;
}else{
//X exist but Y doesn't
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
}
}else{
//X Key Didn't exist
lHistXY[int(this->mData[ip*this->mHeight + jp])][int(this->mData[ip*this->mHeight + jp + this->mOffset])] = 1;
};
}
}
//calculate PMI, Hx, Hy
// iterator->first = key
// iterator->second = value
MI = 0.0;
Hx = 0.0;
Hy = 0.0;
for (HistXY_iter Hist2D_iter = lHistXY.begin(); Hist2D_iter != lHistXY.end(); Hist2D_iter++) {
Px = lHistX[Hist2D_iter->first] / ((double) this->mOffset);
Hx -= Px*log(Px);
for (HistY_iter HistY_iter = Hist2D_iter->second.begin(); HistY_iter != Hist2D_iter->second.end(); HistY_iter++) {
Py = lHistY[HistY_iter->first] / ((double) this->mOffset);
Hy -= Py*log(Py);
Pxy = HistY_iter->second / ((double) this->mOffset);
MI += Pxy*log(Pxy / Py / Px);
}
}
//normalize PMI to max(Hx,Hy) so that the PMI value runs from 0 to 1
if (Hx >= Hy && Hx > 0.0){
MI /= Hx;
}else if(Hy > Hx && Hy > 0.0){
MI /= Hy;
}
else{
MI = 0.0;
}
//write PMI to data output array
if (MI < 1.1){
this->mOutput[ioutter*this->mHeight + j] = MI;
}
else{
this->mOutput[ioutter*this->mHeight + j] = 0.0;
//cout << "problem with output";
}
}
}
//*/
return rvalue;
}
SOLVED By 6502
6502's answer below solved my problem. I needed to explicitly tell Windows to use a binary mode for stdin / stdout. to do that I had to include 2 new header files in my main cpp file.
#include <fcntl.h>
#include <io.h>
add the following lines of code (modified away from 6502's POSIX versions because Visual Studio complained) to the beginning of my main function
_setmode(_fileno(stdout), O_BINARY);
_setmode(_fileno(stdin), O_BINARY);
and then add these lines to my Python code:
import os, msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)

The problem is that stdin/stdout in windows are opened in text mode, not in binary mode and therefore will mess up when the character 13 (\r) is sent.
You can set for example binary mode in Python with
import os, msvcrt
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
and in C++ with
_setmode(fileno(stdout), O_BINARY);
_setmode(fileno(stdin), O_BINARY);
See https://msdn.microsoft.com/en-us/library/tw4k6df8.aspx

Your C++ fwrite code does not account for getting a "short" transfer.
Here's a slight tweak:
//write out data to pipe
columnindex = 0;
while (count < totalbytes / 2) {
wlen = fwrite(&(MI.getOutput()[columnindex]), 1, bytechunk, stdout);
fflush(stdout);
count += wlen;
columnindex += wlen / sizeof(double);
}
Note: You still need to be careful as this would still have issues if wlen returns and it's not a multiple of sizeof(double). For example, if bytechunk were 16 and wlen came back with 14, you'd need an additional fwrite with length 2 before continuing the loop. A generalization of this is just to treat the entire data matrix as a giant byte buffer and loop on that.
Actually, you'll get about the same efficiency with many much smaller transfers that are capped by a fixed (i.e. "known safe amount") of [say] 1024 bytes. This works because the output is a byte stream.
Here's a slightly more general solution that I've often used:
void
xxwrite(void *buf,size_t wlen,FILE *fo)
{
size_t xlen;
for (; wlen > 0; wlen -= xlen, buf += xlen) {
xlen = wlen;
if (xlen > 1024)
xlen = 1024;
xlen = fwrite(buf,1,xlen,fo);
fflush(fo);
}
}
//write out data to pipe
columnindex = 0;
while (count < totalbytes / 2) {
xxwrite(&(MI.getOutput()[columnindex]), bytechunk, stdout);
count += bytechunk;
columnindex += bytechunk / sizeof(double);
}
UPDATE:
I've downloaded all your code and run it. I've got good news and bad news: The code runs fine here, even for a matrix size above 3000. I ran it both using xxwrite and without and the results were the same.
Using my limited python skills, I added some pretty print to your python script (e.g. some line wrap) and had it check every value for range and annotate any bad values. There were none found by the script. Also, visual inspection of the values turned up nothing [this was true before the pretty print, so it hasn't introduced anything]. Just lots of zeros and then blocks in the 0.9 range.
The only difference I can see is that I'm using gcc [and, of course, python] on linux. But, from your script it seems your using Windows [based on the C:\... path for your C++ executable. This shouldn't matter for this application, but I mention it anyway.
So, pipes work here. One thing you might try is to direct the C++ output to a file. Then, have the script read back from the file (i.e. no pipe) and see if that makes a difference. I tend to think not, but ...
Also, I don't know what compiler and python implementation you're using under Windows. Whenever I have to do this, I usually have Cygwin installed as it gives one of the closest implementations of linux/Unix-like environment (i.e. pipes are more likely to work as advertised).
Anyway, here's the modified script. Also note that I added os.getenv to grab alternate matrix sizes and an alternate place for the C++ executable, so that it would work for both of us with minimal pain
#!/usr/bin/python
import subprocess
import struct
import sys
import os
import numpy as np
val = os.getenv("MTX","2000")
sizeX = int(val)
sizeY = sizeX
print "sizeX=%d sizeY=%d" % (sizeX,sizeY)
#set up the variables needed
bytesPerDouble = 8
offset = sizeX*sizeY
totalBytesPerArray = sizeX*sizeY*bytesPerDouble
totalBytes = totalBytesPerArray*2 #the 2 is because we pass 2 different versions of the 2D array
#setup the testing data array
a = np.zeros(sizeX*sizeY*2, dtype='d')
for i in range(sizeX):
for j in range(sizeY):
a[j+i*sizeY] = i
a[j+i*sizeY+offset] = i
if i % 10 == 0:
a[j+i*sizeY+offset] = j
data = a.tobytes('C')
strTotalBytes = str(totalBytes)
strLineBytes = str(sizeY*bytesPerDouble)
#communicate with c++ code
print("starting C++ code")
command = os.getenv("CPGM",None);
if command is None:
command = "C:\Python27\PythonPipes.exe"
proc = subprocess.Popen([command, strTotalBytes, strLineBytes, str(sizeY), str(sizeX)], stdin=subprocess.PIPE,stderr=subprocess.PIPE,stdout=subprocess.PIPE)
ByteBuffer = (data)
proc.stdin.write(ByteBuffer)
def prt(i,b):
hangflg = 0
per = 8
for j in range(0,len(b)):
if ((j % per) == 0):
print("[%d,%d]" % (i,j)),
q = b[j]
print(q),
hangflg = 1
if (q < 0.0) or (q > 1.0):
print("=WTF"),
if ((j % per) == (per - 1)):
print("")
hangflg = 0
if (hangflg):
print("")
print("Reading results back from C++")
for i in range(sizeX):
returnvalues = proc.stdout.read(sizeY*bytesPerDouble)
a = buffer(returnvalues)
b = struct.unpack_from(str(sizeY)+'d', a)
prt(i,b)
###print str(b) + " " + str(i)
###print str(i) + ": " + str(b)
print('done')

Related

Automatically Detecting and Placing Braces On C/C++ Loops in Python

Edited overview and Scope
This problem boils down to the following problem; given a source file, automatically place open and closing braces for optional control blocks in C/C++. These blocks are if, else, do, while, and for afaik.
Overview
I am attempting to trace and analyze various loops, statements, and the like in a massive code repository that I have not written myself. My end goal is to perform timing statistics on all loops (will be expanded to other things in the future, but out of scope for this problem) in a given source of code. These trace functions do various things, but they all follow a similar issue; being placed before and after a block of interest is executed.
In essence, I want to transform the code:
for (i = 0; i < some_condition; i++) {
some_code = goes(here);
}
for (i = 0; i < some_condition; i++)
{
some_code = goes(here);
}
for (i = 0; i < some_condition; i++) { some_code = goes(here); }
for (i = 0; i < some_condition; i++)
some_code = goes(here);
for (i = 0; i < some_condition; i++)
for (i = 0; i < some_condition; i++)
some_code = goes(here);
to the following:
S_TRACE(); for (i = 0; i < some_condition; i++) {
some_code = goes(here);
} E_TRACE();
S_TRACE(); for (i = 0; i < some_condition; i++)
{
some_code = goes(here);
} E_TRACE();
S_TRACE(); for (i = 0; i < some_condition; i++) { some_code = goes(here); } E_TRACE();
S_TRACE(); for (i = 0; i < some_condition; i++) {
some_code = goes(here); } E_TRACE();
S_TRACE(); for (i = 0; i < some_condition; i++) {
S_TRACE(); for (i = 0; i < some_condition; i++) {
some_code = goes(here); } E_TRACE(); } E_TRACE();
Basically, without new lines of code added, I want to insert a function before the statement begins (easy) and after the statement (which can be hard). For example, the following code is actually in the repository of code:
for( int i = 0; names[i]; i++ )
if( !STRCMP( arg, names[i] ) )
{
*dst = names[i];
return 0;
}
return -1;
Terrible readability aside, I'd like to place braces on this type of loop, and insert my tracing functions. Arguments to the function (to account for nesting) I have omitted.
Current Implementation
My current implementation uses regex in Python, as I'm fairly comfortable and quick in this language. Relevant segments of implementation are as follows:
import re
source = []
loops = [r"^\s*(for\s*\(.*\))\s*($|{\s*$|\s*)", r"^\s*(while\s*\(.*\))\s*($|{\s*$|\s*)", r"^\s*(do)\s*({?)$"]
def analyize_line(out_file):
lnum, lstr = source.pop(0)
for index, loop_type in enumerate(loops):
match = re.findall(loop_type, lstr)
if match:
print(lnum + 1, ":", match[0][0])
if '{' in match[0][1]:
out_file.write(lstr.replace(match[0][0], "S_TRACE(); {}".format(match[0][0])))
look_ahead_place()
return
else:
last_chance = lstr + source[0][1]
last_match = re.findall(loop_type, last_chance)
if last_match and '{' in last_match[0][1]:
# same as above
out_file.write(lstr.replace(match[0][0], "S_TRACE(); {}".format(match[0][0])))
lcnum, lcstr = source.pop(0)
out_file.write(lcstr)
look_ahead_place()
else:
# No matching bracket, make one
out_file.write(lstr.replace(match[0][0], "S_TRACE(); {} {{".format(match[0][0])))
look_ahead_and_place_bracket()
return
# if we did not match, just a normal line
out_file.write(lstr)
def look_ahead_place():
depth = 1
for idx, nl in enumerate(source):
substr = ""
for c in nl[1]:
substr += c
if depth > 0:
if c == '{':
depth += 1
elif c == '}':
depth -= 1
if depth == 0:
substr += " E_TRACE(); "
if depth == 0:
source[idx][1] = substr
return
print("Error finding closing bracket here!")
exit()
def look_ahead_and_place_bracket():
for idx, nl in enumerate(source):
# Is the next line a scopable? how to handle multiline? ???
# TODO
return
def trace_loops():
global source
src_filename = "./example.c"
src_file = open(src_filename)
out_file = open(src_filename + ".tr", 'w')
source = [[number, line] for number, line in enumerate(src_file.readlines())]
while len(source) > 0:
analyize_line(out_file)
trace_loops()
The example.c is the example provided above for demonstration purposes. I am struggling to come up with an algorithm that will handle both inline loops, loops with no matching braces, and loops that contain no braces but have multiline inners.
Any help in the development of my algorithm would be much appreciated. Let me know in the comments if there is something that needs to be addressed more.
EDIT :: Further Examples and Expected Results
Characters that are added are surrounded by < and > tokens for visibility.
Nested Brace-less:
for( int i = 0; i < h->fdec->i_plane; i++ )
for( int y = 0; y < h->param.i_height >> !!i; y++ )
fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, f );
<S_TRACE(); >for( int i = 0; i < h->fdec->i_plane; i++ )< {>
<S_TRACE(); >for( int y = 0; y < h->param.i_height >> !!i; y++ )< {>
fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, f );< } E_TRACE();>< } E_TRACE();>
Nested Mixed:
for( int i = 0; i < h->fdec->i_plane; i++ ) {
for( int y = 0; y < h->param.i_height >> !!i; y++ )
fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, ff );
}
<S_TRACE(); >for( int i = 0; i < h->fdec->i_plane; i++ ) {
<S_TRACE(); >for( int y = 0; y < h->param.i_height >> !!i; y++ )< {>
fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, ff );< } E_TRACE();>
}< E_TRACE();>
Large Multiline Nested Brace-less:
for( int i = 0; i < h->sh.i_mmco_command_count; i++ )
for( int j = 0; h->frames.reference[j]; j++ )
if( h->frames.reference[j]->i_poc == h->sh.mmco[i].i_poc )
x264_frame_push_unused(
h,
x264_frame_shift( &h->frames.reference[j] )
);
<S_TRACE(); >for( int i = 0; i < h->sh.i_mmco_command_count; i++ )< {>
<S_TRACE(); >for( int j = 0; h->frames.reference[j]; j++ )< {>
if( h->frames.reference[j]->i_poc == h->sh.mmco[i].i_poc )
x264_frame_push_unused(
h,
x264_frame_shift( &h->frames.reference[j] )
);< } E_TRACE();>< } E_TRACE();>
This Gross Multiliner:
for( int j = 0;
j < ((int) offsetof(x264_t,stat.frame.i_ssd) - (int) offsetof(x264_t,stat.frame.i_mv_bits)) / (int) sizeof(int);
j++ )
((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j];
for( int j = 0; j < 3; j++ )
h->stat.frame.i_ssd[j] += t->stat.frame.i_ssd[j];
h->stat.frame.f_ssim += t->stat.frame.f_ssim;
<S_TRACE(); >for( int j = 0;
j < ((int) offsetof(x264_t,stat.frame.i_ssd) - (int) offsetof(x264_t,stat.frame.i_mv_bits)) / (int) sizeof(int);
j++ )< {>
((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j];< } E_TRACE();>
<S_TRACE(); >for( int j = 0; j < 3; j++ )< {>
h->stat.frame.i_ssd[j] += t->stat.frame.i_ssd[j];< } E_TRACE();>
h->stat.frame.f_ssim += t->stat.frame.f_ssim;
If Statement Edgecase:
Perhaps my implementation requires an inclusion of if statements to account for this?
if( h->sh.i_type != SLICE_TYPE_I )
for( int i_list = 0; i_list < 2; i_list++ )
for( int i = 0; i < 32; i++ )
h->stat.i_mb_count_ref[h->sh.i_type][i_list][i] += h->stat.frame.i_mb_count_ref[i_list][i];

You are going down a rabbit hole. The more cases you run into the more cases you will run into until you have to write an actual parser for C++, which will require learning a whole technology toolchain.
Instead I would strongly recommend that you simplify your life by using a formatting tool like clang-format that already knows how to parse C++ to first rewrite with consistent formatting (so braces are now always there), and then you just need to worry about balanced braces.
(If this is part of a build process, you can copy code, reformat it, then analyze reformatted code.)
Note, if the code makes interesting use of templates, this might not be enough. But it will hopefully get you most of the way there.

After extensive research, numerous applications, and many implementations, I've gotten just what I needed.
There is an existing solution called Uncrustify. The documentation is a bit lacking, but with some probing today the following config will do as I requested above.
$ cat .uncrustify
# Uncrustify-0.70.1
nl_if_brace = remove
nl_brace_else = force
nl_elseif_brace = remove
nl_else_brace = remove
nl_else_if = remove
nl_before_if_closing_paren = remove
nl_for_brace = remove
nl_while_brace = remove
nl_do_brace = remove
nl_brace_while = remove
nl_multi_line_sparen_open = remove
nl_multi_line_sparen_close = remove
nl_after_vbrace_close = true
mod_full_brace_do = force
mod_full_brace_for = force
mod_full_brace_function = force
mod_full_brace_if = force
mod_full_brace_while = force
You can run this using the command:
$ uncrustify -c /path/to/.uncrustify --no-backup example.c
For the future dwellers out there looking at similar issues:
clang-format is essentially a white-space only formatter.
clang-tidy can do, to a lesser extent, of what uncrustify can do; however requires direct integration with your compiler database or a full list of compiler options, which can be combersome.
indent is similar to clang-format
C++ Resharper does not support bracket formatting as of 2019.3, though planned for 2020.1.
VS Code does not support auto/forced bracket insertion
All these claims are made as of today and hopefully will be out of date soon so there are a plethera of tools for us to use and abuse :P

python script slowing down and almost stops after a couple of seconds

I am trying to make a resource monitor with an arduino and it is working great for almost 20 seconds before it almost stops running.
When it slows down, it takes almost 5 seconds between updates.
I have tried to comment out everything with psutil and giving it a permanent value.
and have tried the same with GPUtil.
Here is the python code
import serial.tools.list_ports
import serial
import psutil
import GPUtil
import time
import serial
ports = list(serial.tools.list_ports.comports())
baud = 9600
for p in ports:
if "Arduino" in p[1]:
port = p[0]
ser=serial.Serial(port, baud, timeout=1)
try:
while True:
cpuUsage = psutil.cpu_percent()
ramUsage = psutil.virtual_memory()
cpuUsage = str(cpuUsage)
GPUs = GPUtil.getGPUs()
gpuUsage = GPUs[0].load
gpuUsage = str(gpuUsage)
gpuUsage = gpuUsage[2:]
ramUsage = str(ramUsage.percent)
toSend = cpuUsage + "," + gpuUsage + ","+ ramUsage
print (toSend)
ser.write(toSend.encode())
#print("20.5".encode())
#line = ser.readline()[:-2]
#line.decode()
#print ("Read : " , line);
time.sleep(0.1)
except:
print ("error")
Here is the Arduino code
#include <FastLED.h>
#define NUM_LEDS 15
#define DATA_PIN 6
float Cpu = 40;
float Gpu = 99;
float Ram = 60;
String Cpu_Read;
String Gpu_Read;
String Ram_Read;
int RXLED = 17;
String StrNumbers = "";
int numbers;
int Received = 0;
int Separrator = 0;
String Text = "";
CRGB leds[NUM_LEDS];
void setup() {
// put your setup code here, to run once:
FastLED.addLeds<WS2812B, DATA_PIN, GRB>(leds, NUM_LEDS);
FastLED.setBrightness(5);
Serial.begin(115200);
pinMode(LED_BUILTIN_TX,INPUT);
pinMode(LED_BUILTIN_RX,INPUT);
}
void loop() {
// put your main code here, to run repeatedly:
char inByte = ' ';
StrNumbers = "";
Text = "";
Separrator = 0;
Cpu_Read = "";
Gpu_Read = "";
Ram_Read = "";
Received = 0;
pinMode(LED_BUILTIN_TX,INPUT);
while(Serial.available() > 0){ // only send data back if data has been sent
pinMode(LED_BUILTIN_TX,OUTPUT);
inByte = Serial.read(); // read the incoming data
if (inByte == ','){
Separrator += 1;
}
else if (Separrator == 0){
Cpu_Read += (char) inByte;
}
else if (Separrator == 1){
Gpu_Read += (char) inByte;
}
else if (Separrator == 2){
Ram_Read += (char) inByte;
Serial.print("Ram : ");
Serial.println(Ram_Read);
}
else{
Serial.println("Error");
}
/*
inByte = Serial.read(); // read the incoming data
if (isDigit(inByte)) { // tests if inByte is a digit
StrNumbers += (char) inByte;
Serial.println(inByte); // send the data back in a new line so that it is not all one long line
//Serial.println(numbers); // send the data back in a new line so that it is not all one long line
}
else if (inByte == ".")
{
abortLoop = 1;
}
else{
Text +=(char) inByte;
}
*/
Received = 1;
}
if (Received == 1){
Cpu = Cpu_Read.toInt();
Gpu = Gpu_Read.toInt();
Ram = Ram_Read.toInt();
UpdateMonitor(Cpu ,Gpu ,Ram);
}
Text.trim();
if (StrNumbers != ""){
numbers = StrNumbers.toInt();
Serial.println(numbers);
if (numbers > 100) numbers = 100;
UpdateMonitor(Cpu,numbers,Ram);
}
if(Text!= ""){
//Serial.println(Text); // send the data back in a new line so that it is not all one long line
}
if (Text == "ResourceMonitor"){
Serial.println("Yes");
}
else if (Text != ""){
Serial.println(Text);
numbers = Text.toInt();
if (numbers > 100) numbers = 100;
UpdateMonitor(Cpu, numbers, Ram);
}
}
void UpdateMonitor(int cpu,int gpu, int ram){
int Cpu_Usage = map(cpu, 0, 100, 0, 5);
int Gpu_Usage = map(gpu, 0, 100, 0, 5);
int Ram_Usage = map(ram, 0, 100, 0, 5);
FastLED.clear();
for(int led = 0; led < Ram_Usage; led++) {
leds[led] = CRGB::Blue;
}
for(int led = 0; led < Gpu_Usage; led++) {
leds[9 - led] = CRGB::Green;
}
for(int led = 0; led < Cpu_Usage; led++) {
leds[led+10] = CRGB::Red;
}
FastLED.show();
}

it is fixed now not sure what fixet it but it works atleast :)
thansk to all that have tried to helt

Recoding from C to Python

I'm pretty much a layman in C and I'm learning Python. I need to write the routine described below (in C) for Python:
#include <stdio.h>
#include <math.h>
main()
{
float hold[26], hnew[26];
float dt, dx;
float t, s;
float ho;
float time;
float f1, d2h;
int i;
int nx, nlx;
int n, nend;
int kount, kprint;
dt = 5.0;
dx = 10.0;
t = 0.02;
s = 0.002;
nx = 11;
nlx = nx-1;
ho = 16.0;
for( i = 1; i <= nx; i++ )
{
hold[i] = ho;
hnew[i] = ho;
}
hold[nx] = 11.0;
printf("\t\t\t\thead\t\t\t\t time\n\n");
kount = 1;
kprint = 2;
time = dt;
nend = 100;
for( n = 1; n <= nend; n++ )
{
/* update solution */
for( i = 2; i <= nlx; i++ )
{
f1 = dt*t/s;
d2h = ( hold[i+1] - 2.0*hold[i] + hold[i-1])/(dx*dx);
hnew[i] = hold[i] + (f1*d2h);
}
for( i = 1; i <= nlx; i++ )
{
hold[i] = hnew[i];
}
if( kount == kprint )
{
for( i = 1; i <= nx; i++ )
{
printf(" %.2f",hold[i]);
}
printf(" %6.2f\n",time);
kount = 0;
}
time = time + dt;
kount = kount + 1;
}
}
This is my attempt at Python:
import numpy as np
dt = 5.0
dx = 10.0
t = 0.02
s = 0.002
nx = 11
nlx = nx - 1
ho = 16.0
hold = np.zeros(nx+1)
hnew = np.zeros(nx+1)
for i in range(nx):
hold[i] = ho
hnew[i] = ho
hold[nx] = 11.0
However, I can't get over this because I don't know the Python correspondent of the printf function. What would be the correct form of this function in Python? What does it reffer to?

Just print() in Python with .format.
For example:
x, y = 1, 2
print("x = {0}, y = {1}".format(x, y))
Here's the doc

To print similar to C's printf, the following is an example:
f = 3.25645
g = 3.14159265358979
for fl in (f,g):
print(f'{fl:.2f}')
3.26
3.14
The first f in the print is the format specifier. The f in the braces says to consider the number as a float.

it just print() (see a small program below)
squares = []
for x in range(14):
squares.append(x**2)
squares
squares2 = [x**2 for x in range(100)]
print (squares2)

create document from c++ code from command in python

I'm working on a project using opencv. I choosed to make this project in python to ease my task. But for some tasks, python is very slow and I must code in C++ in order to have a reasonable time of execution.
My first solution was to try to use a wrapper but after two days, I couldn't manage to make it work.
I made another solution: compiling my C++ code and call it from python with a os.system command.
My code is like this:
In C++
#include <iostream>
#include <opencv2/opencv.hpp>
#include "opencv2/core/core.hpp"
using namespace cv;
using namespace std;
int main(int argc, char** argv){
Mat src = imread(argv[1], 1);
Mat dst = imread(argv[2], 1);
unsigned char *sinput = (unsigned char*)(src.data);
int lo = atoi(argv[3]);
int up = atoi(argv[4]);
int connectivity = 4;
int newMaskVal = 255;
int ffillMode = 1;
int flags = connectivity + (newMaskVal << 8) + (ffillMode == 1 ? CV_FLOODFILL_FIXED_RANGE : 0);
Rect ccomp;
unsigned int counter = 0;
for(int j = 0;j < src.rows;j++){
for(int i = 0;i < src.cols;i++){
unsigned short b = sinput[(src.cols * j * 3) + i*3 ] ;
unsigned short g = sinput[(src.cols * j * 3) + i*3 + 1];
unsigned short r = sinput[(src.cols * j * 3) + i*3 + 2];
if (r == 0 and g == 255 and b == 0){
unsigned char *dinput = (unsigned char*)(dst.data);
unsigned short db = dinput[(dst.cols * j * 3) + i*3 ] ;
unsigned short dg = dinput[(dst.cols * j * 3) + i*3 + 1];
unsigned short dr = dinput[(dst.cols * j * 3) + i*3 + 2];
if (dr != 0 or dg != 255 or db != 0){
floodFill(dst, Point(i,j), Scalar(0,255,0), &ccomp, Scalar(lo, lo, lo), Scalar(up, up, up), flags);
counter++;
if (counter > 100 && counter % 100 == 0){
cout << counter << endl;
}
}
}
}
}
imwrite("/home/gaetan/Dropbox/Projects/PyCharm/FacadeClipper/tmp/flooded.png", dst);
cout << "Image wrote!" << endl;
return 0;
}
and in python
def floodFill(self):
lo = 20
up = 20
cv2.imwrite("../tmp/im.png", self.img)
cv2.imwrite("../tmp/imG.png", self.lineImg)
os.system("./../Methods/flood ../tmp/imG.png ../tmp/im.png " + str(lo) + " " + str(up))
time.sleep(1)
self.floodImg = cv2.imread("../tmp/flooded.png")
image = Image.fromarray(cv2.cvtColor(self.floodImg, cv2.COLOR_BGR2RGB))
self.displayImage(image)
os.system('rm ../tmp/im.png ../tmp/imG.png ../tmp/flooded.png')
but sadly, the image "flooded.png" doesn't seems to be created when I use the python code, but when I use the bash command
$ ./flood /home/gaetan/ang.JPG /home/gaetan/an.JPG 20 20
the file is created. What am I doing wrong?
I know this solution is really gross and I'm really hesitating in recode it entirely in C++, but as it is already a very big bunch of code, I would rather not be forced to do it.
Thanks :)

Segmentation fault in if clause using gcc/ubuntu

I am writing a c-function for use in python. When run a segmentation fault occurs, which, according to printf calls, is thrown at an if-clause. The output to the shell is:
row 1, col 0: 1.000000
row:0, -col:0, index:0
-2: 0.000000
else
row:0, -col:1, index:1
-2: 0.000000
else
row:0, -col:2, index:2
-2: 0.000000
else
row:0, -col:3, index:3
-2: 0.000000
else
row:0, -col:4, index:4
-2: 0.000000
else
row:1, -col:0, index:5
-2: 1.000000
Speicherzugriffsfehler (Speicherabzug geschrieben)
(the last line means segmentation fault)
and the c-code is:
#include <stdio.h>
#include <math.h>
#define pi 3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679
void hough(const void* img, int imgRowCount, int imgColCount, const void* thetas, int thetaCount, const void* rhos, int rhoCount, void* out)
{
const double* imgD = (double*) img;
double* outD = (double*)out;
const double* thetasD = (double*)thetas;
const double* rhosD = (double*)rhos;
printf("row 1, col 0: %f\n", imgD[getIndex(1, 0, imgColCount)]);
int row, col, thetaInd, rhoInd, index;
double rhoVal, minDiff, diff, tmp;
for(row = 0; row<imgRowCount; row++)
{
for(col = 0; col<imgColCount; col++)
{
printf("row:%d, -col:%d, index:%d\n", row, col, getIndex(row, col, imgColCount));
tmp = imgD[getIndex(row, col, imgColCount)];
printf("-2: %f\n", tmp);
if (tmp>0.0)
{
printf("-1");
for(thetaInd = 0; thetaInd<thetaCount; thetaInd++)
{
rhoVal = col*cos(thetasD[thetaInd]*(pi/180)) + row*sin(thetasD[thetaInd]*(pi/180));
minDiff = INFINITY;
index = -1;
for(rhoInd = 0; rhoInd<rhoCount; rhoInd++)
{
diff = abs(rhoVal-rhosD[rhoInd]);
if(diff<minDiff)
{
minDiff = diff;
index = rhoInd;
}
}
if(index>=0)
{
printf("1\n");
outD[getIndex(index, thetaInd, thetaCount)] += 1;
}
}
}
else
{
printf("else\n");
}
}
}
}
int getIndex(int row, int col, int maxCol)
{
return col + row*maxCol;
}
and at last the python code beeing used:
import numpy as np
import ctypes
from scipy.misc import imread
def makeReady(arr):
return np.require(arr, dtype=np.double, requirements=["C_CONTIGUOUS"])
def hough(imgBin, thetaRes=1, rhoRes=1):
if len(imgBin.shape) > 2:
imgBin = np.mean(imgBin, axis=2)
if imgBin.max() > 1:
imgBin /= imgBin.max()
if ((imgBin!=0) * (imgBin!=1)).sum()>0:
imgBin = imgBin > (imgBin.max()/2.0)
nR,nC = imgBin.shape
theta = np.linspace(-90.0, 90.0, np.ceil(180.0/thetaRes) + 1.0)
D = np.sqrt((nR - 1)**2 + (nC - 1)**2)
q = np.ceil(D/rhoRes)
nrho = 2*q + 1
rho = np.linspace(-q*rhoRes, q*rhoRes, nrho)
H = np.zeros((len(rho), len(theta)))
imgC = makeReady(imgBin)
thetasC = makeReady(theta)
rhosC = makeReady(rho)
outC = makeReady(H)
lib = ctypes.cdll.LoadLibrary("./hough.so")
lib.hough(imgC.ctypes.data_as(ctypes.c_void_p), imgC.shape[0], imgC.shape[1], thetasC.ctypes.data_as(ctypes.c_void_p), len(thetasC), rhosC.ctypes.data_as(ctypes.c_void_p),outC.ctypes.data_as(ctypes.c_void_p))
if __name__ == "__main__":
img = 1 - (imread("lines.jpeg"))>125
print img.shape
a = np.zeros((5,5))
a[1,0] = 5
hough(a)
what am i doing wrong?
Thank you

The only thing that looks like it could cause that error is going out-of-bounds on an array. Using the function getIndex(...) inside of [] could be causing your problem.
However, due to the difficulty to read the code (no comments, and no context), I recommend using a debugger (like valgrind) to give you information about the location of the error. In fact, valgrind will even print the line number the error occurs on, provided you compile with debug symbols (-g -O0 on gcc and clang).

From the output the error seems to happen in this part of the code:
for(thetaInd = 0; thetaInd<thetaCount; thetaInd++)
{
rhoVal = col*cos(thetasD[thetaInd]*(pi/180)) + row*sin(thetasD[thetaInd]*(pi/180));
minDiff = INFINITY;
index = -1;
for(rhoInd = 0; rhoInd<rhoCount; rhoInd++)
{
diff = abs(rhoVal-rhosD[rhoInd]);
if(diff<minDiff)
{
minDiff = diff;
index = rhoInd;
}
}
if(index>=0)
{
printf("1\n");
outD[getIndex(index, thetaInd, thetaCount)] += 1;
}
}
A segmentation violation could only be caused here by accessing one of the three arrays (thetasD and rhosD and outD) out of their bounds.
This could only happend if the indices run to far, which in turn could only happen if the for-loops' break condtions are wrong, which could only happen if the wrong values had been passed to hough.
The latter indeed seems to be the case, as the Python script is missing to pass rho's size and though is passing nothing for outD.
This line:
lib.hough(imgC.ctypes.data_as(ctypes.c_void_p), imgC.shape[0], imgC.shape[1],
thetasC.ctypes.data_as(ctypes.c_void_p), len(thetasC),
rhosC.ctypes.data_as(ctypes.c_void_p),
outC.ctypes.data_as(ctypes.c_void_p))
should look like:
lib.hough(imgC.ctypes.data_as(ctypes.c_void_p), imgC.shape[0], imgC.shape[1],
thetasC.ctypes.data_as(ctypes.c_void_p), len(thetasC),
rhosC.ctypes.data_as(ctypes.c_void_p), len(rhosC),
outC.ctypes.data_as(ctypes.c_void_p))

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Data corruption Piping between C++ and Python - python

Related

Automatically Detecting and Placing Braces On C/C++ Loops in Python

python script slowing down and almost stops after a couple of seconds

Recoding from C to Python

create document from c++ code from command in python

Segmentation fault in if clause using gcc/ubuntu

Categories

Resources