One of the tools I'm using saves the HTTP data into log files per connection. I was wondering if there is some kind of script to inflate gzip compressed messages in the file.
The data looks like this:
GET /something HTTP/1.1
Content-Type: text/plain
User-Agent: Mozilla/5.0
Connection: Keep-Alive
Accept-Encoding: gzip, deflate
Accept-Language: en-US,*
Host: something.somedomain
HTTP/1.1 200 OK
Content-Encoding: gzip
Content-Type: text/xml;charset=UTF-8
Date: Wed, 28 May 2014 20:33:14 GMT
Server: something
Content-Length: 160
Connection: keep-alive
<GZIP SECTION ...
FOLLOWING MORE REQUESTS/RESPONSES
I thought I could do it by hand but that would take too much time. Then I thought I could make a script but since I'm not quite an expert with bash/python/perl/whatever I was hoping somebody already had created such script.
Thanks for any tips.
Well I helped myself out and slapped together a C++ app to do what I want. Perhaps somebody might find it useful some day. Also handles chunked encoding. How I use it 'ls | grep ".log$" | ungzip'. The log files were from SSLSplit.
// ungzip.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
void inflate(std::istream& dataIn, std::ostream& dataOut)
{
boost::iostreams::filtering_streambuf<boost::iostreams::input> in;
in.push(boost::iostreams::gzip_decompressor());
in.push(dataIn);
boost::iostreams::copy(in, dataOut);
}
struct membuf : std::streambuf
{
membuf(char* begin, char* end) {
this->setg(begin, begin, end);
}
};
int _tmain(int argc, _TCHAR* argv[])
{
boost::iostreams::mapped_file fileIn;
std::ofstream fileOut;
// For each filename on stdin
for (std::string fileName; std::getline(std::cin, fileName);)
{
// Try opening memory mapping of that file.
try
{
fileIn.open(fileName);
if (fileIn.is_open() == false)
{
std::cout << "Error 1" << std::endl;
continue;
}
}
catch (std::exception e)
{
std::cout << e.what();
continue;
}
// Open file to write inflated output to
std::string strOut = fileName;
strOut += ".ugz";
fileOut.open(strOut, std::ios::binary);
if (fileOut.is_open() == false)
{
std::cout << "Error 2" << std::endl;
fileIn.close();
continue;
}
// Load whole file into string to verify if it atleast has HTTP/1.1 somewhere in it.
//Doesnt mean its not binary, but better than nothing.
char * pchData = fileIn.data();
std::string strWhole(pchData, pchData + fileIn.size());
std::regex reg("HTTP/1.1 ");
std::smatch match;
std::stringstream ss(strWhole);
// Interesting header information
enum {REGXCNT = 3};
std::regex regs[REGXCNT] = { std::regex("Content-Length: (\\d+)"), std::regex("Content-Encoding: gzip"), std::regex("Transfer-Encoding: chunked") };
// Verify
if (std::regex_search(strWhole, match, reg))
{
int len = 0;
bool bGzipped = false;
bool bChunked = false;
// While there is something to read
while (!ss.eof())
{
std::string strLine;
std::getline(ss, strLine);
// Empty line between Header and Body
if (strLine == "\r")
{
// Print out the empty line \r\n
fileOut << strLine << std::endl;
// If its gzipped or chunked treat it differently
if (bGzipped || bChunked)
{
// GZipped but not chunked
if (bGzipped && !bChunked)
{
// Construct helper structures inflate and write out
char * pbyBinaryData = new char[len];
ss.read(pbyBinaryData, len);
std::stringbuf stringBuf;
membuf gzipdata(pbyBinaryData, pbyBinaryData + len);
std::istream _in(&gzipdata);
std::ostream _out(&stringBuf);
inflate(_in, _out);
std::stringstream ssOut;
ssOut << _out.rdbuf();
std::string strDataOut = ssOut.str();
fileOut.write(strDataOut.c_str(), strDataOut.length());
delete [] pbyBinaryData;
}
// Chunked data goes here
else if (bChunked)
{
// This vector is used for gzipped data
std::vector<char> unchunkedData;
// Load all chunks
while (true)
{
std::getline(ss, strLine);
// Strip \r from it. It should be always at the end, but whatever - performance is not the issue
strLine.erase(std::remove(strLine.begin(), strLine.end(), '\r'), strLine.end());
// Load chunksize
int nChunkSize = std::stoi(strLine, 0, 16);
if (nChunkSize != 0)
{
// Each chunk is ended \r\n -> +2
char * tmpBuf = new char[nChunkSize + 2];
// Read actual data
ss.read(tmpBuf, nChunkSize + 2);
if (!bGzipped)
{
//Data not gzipped. Write them out directly
fileOut.write(tmpBuf, nChunkSize);
}
else
{
//Data gzipped. Add them to vector to decompress later
unchunkedData.insert(unchunkedData.end(), tmpBuf, tmpBuf + nChunkSize);
}
delete[] tmpBuf;
}
else
{
// All chunks loaded. Break the while loop.
break;
}
}
// Data was gzipped. Time to decompress
if (bGzipped)
{
std::stringbuf stringBuf;
membuf gzipdata(unchunkedData.data(), unchunkedData.data()+unchunkedData.size());
std::istream _in(&gzipdata);
std::ostream _out(&stringBuf);
inflate(_in, _out);
std::stringstream ssOut;
ssOut << _out.rdbuf();
std::string strDataOut = ssOut.str();
fileOut.write(strDataOut.c_str(), strDataOut.length());
}
}
}
// Reset flags
bChunked = false;
len = 0;
bGzipped = false;
}
// Otherwise just save it and try to find a key header info in it
else
{
fileOut << strLine << std::endl;
for (int i = 0; i < REGXCNT; ++i)
{
if (std::regex_search(strLine, match, regs[i]))
{
switch (i)
{
case 0:
len = std::stoi(match[1]);
break;
case 1:
bGzipped = true;
break;
case 2:
bChunked = true;
break;
}
break;
}
}
}
}
}
fileOut.flush();
fileIn.close();
fileOut.close();
}
return 0;
}
Header stdafx.h:
#pragma once
#pragma warning (disable: 4244)
#include <tchar.h>
#include <iostream>
#include <boost/tokenizer.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
#include <fstream>
#include <regex>
#include <vector>
#include <streambuf>
#include <sstream>
Related
I was trying to figure out how the Mach VM Api works as there is almost 0 documentation around it and to do that I was messing around with reading/writing to other processes' memory.
To start I basically created a c program that constantly printed a string and its address. Then I used this program to try modifying the string mid execution and it worked fine:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdbool.h>
#include <libproc.h>
#include <mach/mach.h>
#include <mach/mach_vm.h>
#define EXIT_ON_MACH_ERROR(msg, retval) \
if (kr != KERN_SUCCESS) { mach_error(msg ":" , kr); exit((retval)); }
uint32_t* get_pids(uint16_t* size) {
// Gets all PIDS on the system to locate specific PID later. Probably inefficient
// but I don't care
uint32_t number_of_pids = proc_listpids(1, 0, NULL, 0);
uint32_t* buffer = malloc(sizeof(uint32_t) * number_of_pids);
uint8_t return_code = proc_listpids(1, 0, buffer, sizeof(buffer) * number_of_pids);
uint16_t sum = 0;
for(int i = 0; i < number_of_pids; i++) {
if(buffer[i] != 0) {
sum++;
}
}
uint32_t* final = malloc(sizeof(uint32_t) * sum);
for(int i = 0, t = 0; i < number_of_pids; i++) {
if(buffer[i]) {
final[t++] = buffer[i];
}
}
*size = sum;
return final;
}
int main() {
// Locate correct PID according to process name
uint16_t size;
uint32_t* pids = get_pids(&size);
uint16_t maxpathlength = 1024;
uint16_t path_size = maxpathlength * 4;
char path_buffer[path_size];
uint32_t process_pid = 0;
for(int i = 0; i < size; i++) {
memset(path_buffer, '\0', sizeof(path_buffer));
uint8_t return_code = proc_pidpath(pids[i], path_buffer, path_size);
if(strstr(path_buffer, "Python")) {
printf("%d\n", i);
process_pid = pids[i];
}
//printf("PID: %d, Process: %s\n", pids[i], path_buffer);
}
printf("%d\n", process_pid);
struct proc_taskallinfo pro_info;
uint32_t status = proc_pidinfo(process_pid, PROC_PIDTASKALLINFO, 0, &pro_info, sizeof(pro_info));
printf("Python PID: %d\n", process_pid);
printf("Self PID: %d\n", mach_host_self());
mach_port_t port = 0;
kern_return_t kr = task_for_pid(mach_task_self(), process_pid, &port);
EXIT_ON_MACH_ERROR("task_for_pid", kr);
printf("Port: %d\n\n\n", port);
// STUFF
mach_vm_address_t address = 0x102d4b770;
mach_vm_address_t address_a = 0x102d4b770;
char data[50] = "wow";
vm_offset_t buf;
mach_msg_type_number_t sz;
// MEMORY DEALLOCATION
kern_return_t suc = mach_vm_deallocate(port, (mach_vm_address_t) address, (mach_vm_size_t) 1000);
if (suc!=KERN_SUCCESS)
{
printf("mach_vm_deallocate() failed with message %s!\n", mach_error_string(suc));
}
// MEMORY ALLOCATION
kern_return_t all_suc = mach_vm_allocate(port, (mach_vm_address_t *) &address_a, (vm_size_t) 26, false);
if (all_suc!=KERN_SUCCESS)
{
printf("mach_vm_allocate() failed with message %s!\n", mach_error_string(all_suc));
}
// WRITE TO MEMORY
kern_return_t success = mach_vm_write(port, (vm_address_t) address, (vm_address_t)data, 26);
if (success!=KERN_SUCCESS)
{
printf("mach_vm_write() failed with message %s!\n", mach_error_string(success));
}
// READ FROM MEMORY
kern_return_t read_success = mach_vm_read(port, (vm_address_t) 0x6000018c4030, 26, &buf, &sz);
if (read_success!=KERN_SUCCESS)
{
printf("mach_vm_read() failed with message %s!\n", mach_error_string(read_success));
}
char * newstr = (char *) buf;
printf("%s\n", newstr);
return 0;
}
address and address_a were entered manually after figuring out the variable's address. However, when I tried this with a python process where I again just constantly printed out the string and address, I got the following error message the instant I ran the code above:
zsh: segmentation fault python3 strtest.py
I have no knowledge about CPython, so even after playing around a bit and trying to make it work, nothing happened. How can I make this work even on programs? I know its possible as Bit-Slicer made it work but I wasn't able to found out how.
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 11 months ago.
Improve this question
I want to get out the characters and numbers immediately after the very specific characters "data-permalink=" in a huge text file (50MB). The output should ideally be written in a simple (separate) text file looking something like this:
34k89
456ij
233a4
...
the "data-permalink="" stays always the exact same (as usual in source codes), but the id within can be any combination of characters and numbers. It seemed simple at first, but since it is not at the start of a line, or the needed output is not a separate word I was not able to come up with a working solution at all in the required time. I am running out of time and need a solution or hints to this immediately, so any help is greatly appreciated
example of data in the source data file:
random stuff above
....
I would understand c++ or python the most, so such a solution using these languages would be nice.
I tried something like this:
#include <iostream>
#include <string>
#include <fstream>
using namespace std;
int main()
{
ifstream in ("data.txt");
if(in.fail())
{
cout<<"error";
}
else
{
char c;
while(in.get(c))
{
if(c=="data-permalink=")
cout<<"lol this is awesome"
else
cout<<" ";
}
}
return 0;
}
It is just a random attempt to see if the structure works, nowhere near a solution. This prob. also gives u guys a good guess on how bad i am currently lmao.
Hm, basically 50MB is considered "small" nowadays. With taht small data, you can read the whole file into one std::stringand then do a linear search.
So, the algorithm is:
Open files and check, if they could be opened
Read complete file into a std::string
Do a linear search for the string "data-permalink=""
Remember the start position of the permalink
Search for the closing "
Use the std::strings substrfunction to create the output permalink string
Write this to a file
Goto 1.
I created a 70MB random test file with random data.
The whole procedure takes less than 1s. Even with slow linear search.
But caveat. You want to parse a HTML file. This will most probably not work, because of potential nested structures. For this you should use existing HTML parsers.
Anyway. Here is one of many possible solutions.
#include <iostream>
#include <fstream>
#include <string>
#include <random>
#include <iterator>
#include <algorithm>
std::string randomSourceCharacters{ " abcdefghijklmnopqrstuvwxyz" };
const std::string sourceFileName{ "r:\\test.txt" };
const std::string linkFileName{ "r:\\links.txt" };
void createRandomData() {
std::random_device randomDevice;
std::mt19937 randomGgenerator(randomDevice());
std::uniform_int_distribution<> randomCharacterDistribution(0, randomSourceCharacters.size() - 1);
std::uniform_int_distribution<> randomLength(10, 30);
if (std::ofstream ofs{ sourceFileName }; ofs) {
for (size_t i{}; i < 1000000; ++i) {
const int prefixLength{ randomLength(randomGgenerator) };
const int linkLength{ randomLength(randomGgenerator) };
const int suffixLength{ randomLength(randomGgenerator) };
for (int k{}; k < prefixLength; ++k)
ofs << randomSourceCharacters[randomCharacterDistribution(randomGgenerator)];
ofs << "data-permalink=\"";
for (int k{}; k < linkLength; ++k)
ofs << randomSourceCharacters[randomCharacterDistribution(randomGgenerator)];
ofs << "\"";
for (int k{}; k < suffixLength; ++k)
ofs << randomSourceCharacters[randomCharacterDistribution(randomGgenerator)];
}
}
else std::cerr << "\nError: Could not open source file '" << sourceFileName << "' for writing\n";
}
int main() {
// Please uncomment if you want to create a file with test data
// createRandomData();
// Open source file for reading and check, if file could be opened
if (std::ifstream ifs{ sourceFileName }; ifs) {
// Open link file for writing and check, if file could be opened
if (std::ofstream ofs{ linkFileName }; ofs) {
// Read the complete 50MB file into a string
std::string data(std::istreambuf_iterator<char>(ifs), {});
const std::string searchString{ "data-permalink=\"" };
const std::string permalinkEndString{ "\"" };
// Do a linear search
for (size_t posBegin{}; posBegin < data.length(); ) {
// Search for the begin of the permalink
if (posBegin = data.find(searchString, posBegin); posBegin != std::string::npos) {
const size_t posStartForEndSearch = posBegin + searchString.length() ;
// Search fo the end of the perma link
if (size_t posEnd = data.find(permalinkEndString, posStartForEndSearch); posEnd != std::string::npos) {
// Output result
const size_t lengthPermalink{ posEnd - posStartForEndSearch };
const std::string output{ data.substr(posStartForEndSearch, lengthPermalink) };
ofs << output << '\n';
posBegin = posEnd + 1;
}
else break;
}
else break;
}
}
else std::cerr << "\nError: Could not open source file '" << sourceFileName << "' for reading\n";
}
else std::cerr << "\nError: Could not open source file '" << sourceFileName << "' for reading\n";
}
Edit
If you need unique links you may store the result in an std::unordered_set and then output later.
#include <iostream>
#include <fstream>
#include <string>
#include <iterator>
#include <algorithm>
#include <unordered_set>
const std::string sourceFileName{ "r:\\test.txt" };
const std::string linkFileName{ "r:\\links.txt" };
int main() {
// Open source file for reading and check, if file could be opened
if (std::ifstream ifs{ sourceFileName }; ifs) {
// Open link file for writing and check, if file could be opened
if (std::ofstream ofs{ linkFileName }; ofs) {
// Read the complete 50MB file into a string
std::string data(std::istreambuf_iterator<char>(ifs), {});
const std::string searchString{ "data-permalink=\"" };
const std::string permalinkEndString{ "\"" };
// Here we will store unique results
std::unordered_set<std::string> result{};
// Do a linear search
for (size_t posBegin{}; posBegin < data.length(); ) {
// Search for the begin of the permalink
if (posBegin = data.find(searchString, posBegin); posBegin != std::string::npos) {
const size_t posStartForEndSearch = posBegin + searchString.length();
// Search fo the end of the perma link
if (size_t posEnd = data.find(permalinkEndString, posStartForEndSearch); posEnd != std::string::npos) {
// Output result
const size_t lengthPermalink{ posEnd - posStartForEndSearch };
const std::string output{ data.substr(posStartForEndSearch, lengthPermalink) };
result.insert(output);
posBegin = posEnd + 1;
}
else break;
}
else break;
}
for (const std::string& link : result)
ofs << link << '\n';
}
else std::cerr << "\nError: Could not open source file '" << sourceFileName << "' for reading\n";
}
else std::cerr << "\nError: Could not open source file '" << sourceFileName << "' for reading\n";
}
This is the Python Code to Execute external python files
exec(open("file.py").read())
How to do it in c
main.cpp
#include <iostream>
#include <string>
#include <list>
class PrivateDriverData {
public:
std::string PythonExecutable = "python3";
std::string exec(std::string command) {
char buffer[128];
std::string result = "";
// Open pipe to file
FILE* pipe = popen(command.c_str(), "r");
if (!pipe) {
return "popen failed!";
}
while (!feof(pipe)) {
if (fgets(buffer, 128, pipe) != NULL)
result += buffer;
}
pclose(pipe);
return result;
}
};
std::string ExecDriver() {
PrivateDriverData LocalPDD;
std::string ScraperExecuteData = LocalPDD.PythonExecutable + " file.py";
return LocalPDD.exec(ScraperExecuteData);
}
int main() {
std::string answer = ExecDriver();
std::cout << answer << "\n";
}
The closet thing C has is dlopen(). which open a compiled and linked dynamic library and provides a way to run the code it contains.
It's not standard C and requires a hosted environment so it's not going to work on Arduino etc.
I'm trying to extract data from .dat (data in file is in 16 bit) file in c++ which is showing garbage data. I'm able to extract it in python (code provided below as well) but my work requires it to be in C++. Here is the C code that I'm using.
Also I would like to know what is the fastest way to extract data since my file are a bit large in size.
#include<iostream>
#define N 4000
using namespace std;
struct record {
char details[1500];
};
int main(int argc, char** argv) {
FILE *fp = fopen("mirror.dat","rb");
record *records;
if (fp==NULL){
cout<<"Problem \n";
system("pause");
return -1;
}
records = new record[N];
fread((record *)records, sizeof(record),N,fp );
fclose(fp);
for(int i=0; i<N;i++){
cout<<"[" << i+1 << "]" << records[i].details << "\n";
}
system("PAUSE");
return 0;
}
Below is the python code.
fpath="mirror.dat"
with open(fpath, 'rb') as r_file:
data=r_file.read()
bits=[data[i+1]<<8 | data[i] for i in range(0, len(data),2)]
print(type(bits))
bits_decod = []
for k in bits:
bits_decod.append(k)
print((bits_decod))
In C++, when you print a char array using <<, it expects it to be a C-style character string.
You need to write a loop that decodes it similarly to the way the Python script does.
#include<iostream>
#define N 4000
using namespace std;
uint8_t data[N * 1500];
uint16_t bits[N * 750];
int main(int argc, char** argv) {
FILE *fp = fopen("mirror.dat","rb");
record *records;
if (fp==NULL){
cout<<"Problem \n";
system("pause");
return 1;
}
size_t data_len = fread((void *)data, sizeof(data),1,fp );
if (data_len < 0) {
cout << "Read error\n";
system("pause");
return 1;
}
fclose(fp);
for (int i = 0; i < data_len; i+=2) {
bits[i/2] = data[i+1] << 8 | data[i];
}
int bits_len = data_len / 2;
for(int i=0; i<bits_len;i++){
cout<<"[" << i+1 << "]" << bits[i] << "\n";
}
system("PAUSE");
return 0;
}
In C++ you can read the contents of a file into a std::vector of uint8_t with the use of std::istream_iterator. Then loop through the vector, decoding the bytes and putting into a vector of uint16_t.
std::istream_iterator<uint8_t>(testFile) is an iterator to beginning of file and std::istream_iterator<uint8_t>() is default-constructed with the special state "end-of-stream". So using this iterator can be used to read from the beginning of the file to the end. We don't have to calculate the size ourselves, and therefore can be used to read varying number of entries in the file.
The equivalent C++ program will look something like this:
#include <iostream>
#include <cstddef>
#include <vector>
#include <iterator>
#include <algorithm>
#include <fstream>
#include <cstdint>
int main()
{
//Open file
std::ifstream testFile("mirror.dat", std::ios::in | std::ios::binary);
if (!testFile)
{
std::cout << "Problem \n";
system("pause");
return 1;
}
//Read in file contents
std::vector<uint8_t> data((std::istream_iterator<uint8_t>(testFile)), std::istream_iterator<uint8_t>());
std::vector<uint16_t> bytes_decoded;
bytes_decoded.reserve(data.size() / 2);
//Decode bytes
for (std::size_t i = 0; i < data.size(); i += 2)
{
bytes_decoded.push_back(data[i + 1] << 8 | data[i]);
}
//Copy decoded bytes to screen with one space between each number
std::copy(bytes_decoded.cbegin(), bytes_decoded.cend(), std::ostream_iterator<uint16_t>(std::cout), " ");
system("PAUSE");
return 0;
}
Note: This requires C++11 or above for the types uint8_t and uint16_t in the header cstdint. You could use unsigned char and unsigned short instead if you don't have a modern C++ compiler.
Testing Linux syscall epoll using a simple parent-child program.
Expected behaviour
As the child writes a no every second, the parent should read it from the pipe and write a no every second to stdout.
Actual behaviour
The parent waits till the child writes all the nos, and then reads all data from pipe and writes to stdout. Verified by doing strace on parent. It blocks in epoll_wait.
please check README in github more information
Parent
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <error.h>
#include <errno.h>
#include <string.h>
#include <sys/wait.h>
#include <sys/epoll.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define NAMED_FIFO "aFifo"
static void set_nonblocking(int fd) {
int flags = fcntl(fd, F_GETFL, 0);
if (flags == -1) {
perror("fcntl()");
return;
}
if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) {
perror("fcntl()");
}
}
void errExit(char *msg) {
perror(msg);
exit(-1);
}
void printArgs(char **argv,char **env) {
for(int i=0;argv[i];i++)
printf("argv[%d]=%s\n",i,argv[i]);
for(int i=0;env[i];i++)
printf("env[%d]=%s\n",i,env[i]);
}
void PrintNos(short int max,char *name) {
int fifo_fd,rVal;
int bSize=2;
char buffer[bSize];
fifo_fd = open(NAMED_FIFO,O_RDONLY);
if(fifo_fd<0)
errExit("open");
for(short int i=0;i<max;i++) {
rVal = read(fifo_fd,buffer,bSize);
if(rVal != bSize)
errExit("read");
printf("%03d\n",i);
}
}
int main(int argc, char *argv[],char *env[]) {
//int pipe_fds_child_stdin[2] ;
int pipe_fds_child_stdout[2] ;
pid_t child_id ;
//if( pipe(pipe_fds_child_stdin) < 0 )
// errExit("pipe");
if( pipe(pipe_fds_child_stdout) < 0 )
errExit("pipe");
child_id = fork();
if( child_id > 0 ) {
const int MAX_POLL_FDS = 2;
const int BUF_SIZE = 4;
size_t readSize;
char buf[BUF_SIZE];
int status;
int epoll_fd;
int nfds ;
struct epoll_event e_e, e_events[MAX_POLL_FDS];
memset(e_events,'\0',sizeof(e_events));
memset(&e_e,'\0',sizeof(e_e));
//close(pipe_fds_child_stdin[0]);
close(pipe_fds_child_stdout[1]);
epoll_fd = epoll_create1(0);
if(epoll_fd < 0)
errExit("epoll_create1");
e_e.data.fd = pipe_fds_child_stdout[0];
e_e.events = EPOLLIN;
if(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pipe_fds_child_stdout[0], &e_e) < 0)
errExit("epoll_ctl");
while(1) {
nfds = epoll_wait(epoll_fd, e_events,MAX_POLL_FDS,-1);
if( nfds < 0)
errExit("epoll_wait");
for(int i=0;i<nfds;i++) {
if( e_events[i].data.fd == pipe_fds_child_stdout[0]) {
if( e_events[i].events & EPOLLIN) {
readSize = read(pipe_fds_child_stdout[0],buf,BUF_SIZE);
if( readSize == BUF_SIZE ) {
write(STDOUT_FILENO,buf,BUF_SIZE);
} else if(readSize == 0) { // eof
errExit("readSize=0");
} else {
errExit("read");
}
} else if( e_events[i].events & EPOLLHUP) {
printf("got EPOLLHUP on pipefd\n");
wait(&status);
exit(0);
} else {
errExit("Unexpected event flag returned by epoll_wait on waited fd");
}
} else {
errExit("epoll_wait returned non-awaited fd");
}
}
}
} else if( child_id == 0 ) {
close(0);
close(1);
//close(pipe_fds_child_stdin[1]);
close(pipe_fds_child_stdout[0]);
//dup2(pipe_fds_child_stdin[0],0);
dup2(pipe_fds_child_stdout[1],1);
execvpe(argv[1],&(argv[1]),env);
//PrintNos(100,"P");
//errExit("execvp");
} else {
errExit("fork");
}
}
Child
import sys
import time
import os
#f=open("aFifo",'r')
for x in range(10):
#try:
# val = f.read(2)
#except Exception as e:
# raise
time.sleep(1)
print(f'{x:03d}')
This is due to python buffering, which can be disabled by passing -u option to python.
After much searching and research, understood that this is due to pipe buffer. Though the client writes, it is in pipe buffer. Only after the pipe buffer is full, the kernel sends ready event on that descriptor. The minimum is pagesize, kernel doesn't allow to set below that. But it can be increased. Got to this, by changing from epoll to poll/select. After changing to poll/select the behaviour was same. Blocking though data was available in pipe.
import fcntl
import os
F_SETPIPE_SZ=1031
fds = os.pipe()
for i in range(5):
print(fcntl.fcntl(fds[0],F_SETPIPE_SZ,64))
$ python3.7 pipePageSize.py
4096
4096
This is the modified client. Appropriate changes in server too.
import time
pageSize=1024*8
for x in range(100):
time.sleep(0.5)
print(f'{x:-{pageSize}d}')