How to parse serialized C structs from binary file in python? - python

I have a handful of different type of C-structs that are all compressed into a binary file.
struct-id serialized-struct struct-id serialized-struct ...
If it were the same struct over and over, it would make sense to use the struct package, but I want to switch between previously defined structs all the time.
STRUCT1_ID = '\xAA'
STRUCT2_ID = '\xBB'
STRUCT_IDS = frozenset([STRUCT1_ID, STRUCT2_ID])
struct1s = []
struct2s = []
def create_test_file(filepath):
with open(filepath, 'wb') as f:
# Write an example struct1 id followed by struct
f.write(STRUCT1_ID)
f.write(b'\x01\x02\x03\x04\x05\x06')
# Write an example struct2 id followed by struct
f.write(STRUCT2_ID)
f.write(b'\x07\x08\x09\x0A')
def parse_test_file(filepath):
with open(filepath, 'rb') as f:
msg_type = f.read(1)
while msg_type:
print(byte)
if byte in STRUCT_IDS:
# Parse the next however many bytes needed by struct
# logic breaks down here
struct1s.append(turnIntoStruct(f.read(?)))
msg_type = f.read(1)
else:
print('Corrupted file. Unrecognized id')
In C, the structs would be:
typedef struct struct1_s {
uint16_t a;
uint16_t b;
uint16_t c;
} struct1_t;
typedef struct struct2_s {
uint16_t d;
uint16_t e;
} struct2_t;
// Declare and initialize the structs
struct1_t s1 = {
.a = 0x0201,
.b = 0x0403,
.c = 0x0605
};
struct2_t s2 = {
.d = 0x0807,
.e = 0x0A09
};
I'm less python than I am C right now. I seem unable to bring construct to python 3.4.3?

Map the ID to the struct pattern, and use the appropriate one.
structmap = {
b'\xaa': ('3H', struct1s),
b'\xbb': ('2H', struct2s)
}
...
structmap[msg_type][1].append(struct.unpack(structmap[msg_type][0],
f.read(struct.calcsize(structmap[msg_type][0]))))

Related

How to read structured binary data from a file?

The following C++ code writes a header to a file:
#include <iostream>
struct Header
{
uint16_t name;
uint8_t type;
uint8_t padding;
uint32_t width, height;
uint32_t depth1, depth2;
float dMin, dMax;
};
int main()
{
Header header;
header.name = *reinterpret_cast<const uint16_t*>("XO");
header.type = true;
header.width = (uint32_t)512;
header.height = (uint32_t)600;
header.depth1 = (uint32_t)16;
header.depth2 = (uint32_t)25;
header.dMin = 5.0;
header.dMax = 8.6;
FILE* f = fopen("header.bin", "wb");
fwrite(&header, sizeof(Header), 1, f);
}
I am looking to read these header.bin files using Python. In C++ I would be doing something like:
fread(&header, sizeof(Header), 1, f)
But I'm unsure how to read the bytes and convert them into the corresponding fields that the Header struct has in Python?
Use the struct module to define the binary layout of a C-like struct and de-/serialise it:
import struct
# Format String describing the data layout
layout = "H B x 2L 2L 2f"
# Object representing the layout, including size
header = struct.Struct(layout)
with open("header.bin", "rb") as in_stream:
print(header.unpack(in_stream.read(header.size))
The layout is a format string describing the fields in-order, e.g. H for uint16_t, B for uint8_t, x for a pad byte, and so on.
I would do this with Python's ctypes, somewhat so you can share the Header header
Create a class from ctypes.Structure to map the types
import ctypes
class StructHeader(ctypes.Structure):
_fields_ = [
("name", ctypes.c_uint16),
("type", ctypes.c_uint8),
...
]
And create a function which does what you want with a signature like
int header(struct Header &buffer)
{
// open the file and write to buffer
// opportunity for other features
}
Then you can compile a shared object to read it which returns that type
gcc -shared -Wl,-soname,your_soname \
-o library_name file_list library_list
And call out with ctypes.CDLL to read the headers
header = ctypes.CDLL("mylib.so.1").header # function named header
header.argtypes = [ctypes.POINTER(StructHeader)]
header.restype = ctypes.c_int
# allocate struct for write
buffer = StructHeader()
# call out to function to write buffer
header(buffer)

how to create a data structure binary file in python?

I am working a image build project, I am trying to use python to create a structure,
I know c is very easy to create a data structure, here is a c example.
struct Books {
char title[50];
char author[50];
char subject[100];
uint32_t book_id;
uint8_t book_ver;
uint16_t book_location;
};
void main( ) {
FILE *fptr;
struct Books Book1; /* Declare Book1 of type Book */
strcpy( Book1.title, "C Programming");
strcpy( Book1.author, "Anna Ali");
strcpy( Book1.subject, "C Programming Tutorial");
Book1.book_id = 649507;
Book1.book_ver = 2;
Book1.book_location= 308;
fptr = fopen("book_struct.bin","wb");
fwrite(&Book1, sizeof(struct Books), 1, fptr);
fclose(fptr);
}
How can I create a data struct binary file in python?
Can someone help to provide a python reference code?

C char array from python string

I have a list of strings in python which I'm trying to pass down to a C extension for character analysis. I've gotten so far as to have the list broken up into their individual string PyObjects. Next, I'm hoping to split these strings into their individual characters so that every string PyObject is now a corresponding C-type character array. I can't seem to figure out how to do this though.
Here's what I have so far: Currently after building the .pyd file it will return a list of 1's as a filler to Python (so everything else works), I just don't know how to split a string PyObject into the C-type character array.
--- cExt.c ---
#include <Python.h>
#include <stdio.h>
static int *CitemCheck(PyObject *commandString, int commandStringLength) {
// HAALP
//char* commandChars = (char*) malloc(commandStringLength*sizeof(char*));
// char c[] = PyString_AsString("c", commandString);
// printf("%c" , c);
// printf("%s", PyString_AsString(commandString));
// for (int i=0; i<sizeof(commandChars)/sizeof(*commandChars); i++) {
// printf("%s", PyString_AsString(commandString));
// printf("%c", commandChars[i]);
// }
return 1; // TODO: RETURN PROPER RESULTANT
}
static PyObject *ClistCheck(PyObject *commandList, int commandListLength) {
PyObject *results = PyList_New(commandListLength);
for (int index = 0; index < commandListLength; index++) {
PyObject *commandString;
commandString = PyList_GetItem(commandList, index);
int commandStringLength = PyObject_Length(commandString);
// CitemCheck should take string PyObject and its length as int
int x = CitemCheck(commandString, commandStringLength);
PyObject* pyItem = Py_BuildValue("i", x);
PyList_SetItem(results, index, pyItem);
}
return results;
}
static PyObject *parseListCheck(PyObject *self, PyObject *args) {
PyObject *commandList;
int commandListLength;
if (!PyArg_ParseTuple(args, "O", &commandList)){
return NULL;
}
commandListLength = PyObject_Length(commandList);
return Py_BuildValue("O", ClistCheck(commandList, commandListLength));
}
static char listCheckDocs[] =
""; // TODO: ADD DOCSTRING
static PyMethodDef listCheck[] = {
{"listCheck", (PyCFunction) parseListCheck, METH_VARARGS, listCheckDocs},
{NULL,NULL,0,NULL}
};
static struct PyModuleDef DCE = {
PyModuleDef_HEAD_INIT,
"listCheck",
NULL,
-1,
listCheck
};
PyMODINIT_FUNC PyInit_cExt(void){
return PyModule_Create(&DCE);
}
for reference, my temporary extension build file:
--- _c_setup.py ---
(located in same folder as cExt.c)
"""
to build C files, pass:
python _c_setup.py build_ext --inplace clean --all
in command prompt which is cd'd to the file's dierctory
"""
import glob
from setuptools import setup, Extension, find_packages
from os import path
here = path.abspath(path.dirname(__file__))
files = [path.split(x)[1] for x in glob.glob(path.join(here, '**.c'))]
extensions = [Extension(
path.splitext(x)[0], [x]
) for x in files]
setup(
ext_modules = extensions,
)
You can use PyUnicode_AsEncodedString, which
Encode a Unicode object and return the result as Python bytes object. encoding and errors have the same meaning as the parameters of the same name in the Unicode encode() method. The codec to be used is looked up using the Python codec registry. Return NULL if an exception was raised by the codec.
see https://docs.python.org/3/c-api/unicode.html#c.PyUnicode_AsEncodedString
Then with PyBytes_AsString you get a pointer to internal buffer with a terminating NUL byte. This buffer must neither be deallocated nor modified. If you need a copy you could use e.g. strdup.
see https://docs.python.org/3/c-api/bytes.html#c.PyBytes_AsString
Slightly modifying your code it could look like this:
PyObject *encodedString = PyUnicode_AsEncodedString(commandString, "UTF-8", "strict");
if (encodedString) { //returns NULL if an exception was raised
char *commandChars = PyBytes_AsString(encodedString); //pointer refers to the internal buffer of encodedString
if(commandChars) {
printf("the string '%s' consists of the following chars:\n", commandChars);
for (int i = 0; commandChars[i] != '\0'; i++) {
printf("%c ", commandChars[i]);
}
printf("\n");
}
Py_DECREF(encodedString);
}
If one would test with:
import cExt
fruits = ["apple", "pears", "cherry", "pear", "blueberry", "strawberry"]
res = cExt.listCheck(fruits)
print(res)
The output would be:
the string 'apple' consists of the following chars:
a p p l e
the string 'pears' consists of the following chars:
p e a r s
the string 'cherry' consists of the following chars:
c h e r r y
the string 'pear' consists of the following chars:
p e a r
the string 'blueberry' consists of the following chars:
b l u e b e r r y
the string 'strawberry' consists of the following chars:
s t r a w b e r r y
[1, 1, 1, 1, 1, 1]
Side note not directly related to the question:
Your CitemCheck function returns a pointer to int, but if looking at how it is called, it seems that you want to return an int value. The function signature should look more like this:
static int CitemCheck(PyObject *commandString, int commandStringLength)
(note the removed * after int).

C equivalent to python pickle (object serialization)?

What would be the C equivalent to this python code?
Thanks.
data = gather_me_some_data()
# where data = [ (metic, datapoints), ... ]
# and datapoints = [ (timestamp, value), ... ]
serialized_data = cPickle.dumps(data, protocol=-1)
length_prefix = struct.pack("!L", len(serialized_data))
message = length_prefix + serialized_data
C doesn't supports direct serialization mechanism because in C you can't get type information at run-time. You must yourself inject some type info at run-time and then construct required object by that type info. So define all your possible structs:
typedef struct {
int myInt;
float myFloat;
unsigned char myData[MY_DATA_SIZE];
} MyStruct_1;
typedef struct {
unsigned char myUnsignedChar;
double myDouble;
} MyStruct_2;
Then define enum which collects info about what structs in total you have:
typedef enum {
ST_MYSTRUCT_1,
ST_MYSTRUCT_2
} MyStructType;
Define helper function which lets to determine any struct size:
int GetStructSize(MyStructType structType) {
switch (structType) {
case ST_MYSTRUCT_1:
return sizeof(MyStruct_1);
case ST_MYSTRUCT_2:
return sizeof(MyStruct_2);
default:
// OOPS no such struct in our pocket
return 0;
}
}
Then define serialize function:
void BinarySerialize(
MyStructType structType,
void * structPointer,
unsigned char * serializedData) {
int structSize = GetStructSize(structType);
if (structSize != 0) {
// copy struct metadata to serialized bytes
memcpy(serializedData, &structType, sizeof(structType));
// copy struct itself
memcpy(serializedData+sizeof(structType), structPointer, structSize);
}
}
And de-serialization function:
void BinaryDeserialize(
MyStructType structTypeDestination,
void ** structPointer,
unsigned char * serializedData)
{
// get source struct type
MyStructType structTypeSource;
memcpy(&structTypeSource, serializedData, sizeof(structTypeSource));
// get source struct size
int structSize = GetStructSize(structTypeSource);
if (structTypeSource == structTypeDestination && structSize != 0) {
*structPointer = malloc(structSize);
memcpy(*structPointer, serializedData+sizeof(structTypeSource), structSize);
}
}
Serialization usage example:
MyStruct_2 structInput = {0x69, 0.1};
MyStruct_1 * structOutput_1 = NULL;
MyStruct_2 * structOutput_2 = NULL;
unsigned char testSerializedData[SERIALIZED_DATA_MAX_SIZE] = {0};
// serialize structInput
BinarySerialize(ST_MYSTRUCT_2, &structInput, testSerializedData);
// try to de-serialize to something
BinaryDeserialize(ST_MYSTRUCT_1, &structOutput_1, testSerializedData);
BinaryDeserialize(ST_MYSTRUCT_2, &structOutput_2, testSerializedData);
// determine which object was de-serialized
// (plus you will get code-completion support about object members from IDE)
if (structOutput_1 != NULL) {
// do something with structOutput_1
free(structOutput_1);
}
else if (structOutput_2 != NULL) {
// do something with structOutput_2
free(structOutput_2);
}
I think this is most simple serialization approach in C. But it has some problems:
struct must not have pointers, because you will never know how much memory one needs to allocate when serializing pointers and from where/how to serialize data into pointers.
this example has issues with system endianess - you need to be careful about how data is stored in memory - in big-endian or little-endian fashion and reverse bytes if needed [when casting char * to integal type such as enum] (...or refactor code to be more portable).
If you can use C++, there is the PicklingTools library

python using ctypes to work with dll - structure OUT argument

In the header file of the dll I have the following structure
typedef struct USMC_Devices_st{
DWORD NOD; // Number of the devices ready to work
char **Serial; // Array of 16 byte ASCII strings
char **Version; // Array of 4 byte ASCII strings
} USMC_Devices; // Structure representing connected devices
I would like to call a dll function:
DWORD USMC_Init( USMC_Devices &Str );
I tried with this:
class USMCDevices(Structure):
_fields_ = [("NOD", c_long),
("Serial", c_char_p),
("Version", c_char_p)]
usmc = cdll.USMCDLL #this is the dll file
init = usmc.USMC_Init
init.restype = c_int32; # return type
init.argtypes = [USMCDevices]; # argument
dev = USMCDevices()
init(dev)
I get an error here. I guess the problem is with "Serial" and "Version" which both are array corresponding to the NOD (number of devices).
Any ideas how to solve this problem?
I really appreciate your help!!!
Use POINTER(c_char_p) for the char ** pointers. Indexing Serial or Version creates a Python string for the given null-terminated string. Note that indexing in the array beyond NOD - 1 either produces garbage values or will crash the interpreter.
C:
#include <windows.h>
typedef struct USMC_Devices_st {
DWORD NOD; // Number of the devices ready to work
char **Serial; // Array of 16 byte ASCII strings
char **Version; // Array of 4 byte ASCII strings
} USMC_Devices;
char *Serial[] = {"000000000000001", "000000000000002"};
char *Version[] = {"001", "002"};
__declspec(dllexport) DWORD USMC_Init(USMC_Devices *devices) {
devices->NOD = 2;
devices->Serial = Serial;
devices->Version = Version;
return 0;
}
// build: cl usmcdll.c /LD
Python:
import ctypes
from ctypes import wintypes
class USMCDevices(ctypes.Structure):
_fields_ = [("NOD", wintypes.DWORD),
("Serial", ctypes.POINTER(ctypes.c_char_p)),
("Version", ctypes.POINTER(ctypes.c_char_p))]
usmc = ctypes.cdll.USMCDLL
init = usmc.USMC_Init
init.restype = wintypes.DWORD
init.argtypes = [ctypes.POINTER(USMCDevices)]
dev = USMCDevices()
init(ctypes.byref(dev))
devices = [dev.Serial[i] + b':' + dev.Version[i]
for i in range(dev.NOD)]
print('\n'.join(d.decode('ascii') for d in devices))
Output:
000000000000001:001
000000000000002:002

Categories