Related
I am implementing a protocol in an STM32F412 board. It's almost done, I just need to do a CRC check for the received data.
I tried using the internal CRC module for calculating the CRC but I could not match the result to any online CRC algorithm online, so I decided to do a simple implementation of the Ethernet CRC.
static const uint32_t crc32_tab[] =
{
0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
0x2d02ef8dL
};
uint32_t calc_crc_calculate(uint8_t *pData, uint32_t uLen)
{
uint32_t val = 0xFFFFFFFFU;
int i;
for(i = 0; i < uLen; i++) {
val = crc32_tab[(val ^ pData[i]) & 0xFF] ^ ((val >> 8) & 0x00FFFFFF);
}
return val^0xFFFFFFFF;
}
I calculated the crc of 0x6F and compared the result to the online calculators and it apparently matches.
When I try to test the protocol with my python code I'm just unable to match the CRCs. On python I'm using the following code:
d = 0x6f
crc = zlib.crc32(bytes(d))&0xFFFFFFFF
I'm now unable to tell which is right. Apparently my algorithm is OK because it matches the online calculator. BUT those online calculators do not seem to be reliable sometimes and I doubt that python's zlib implementation is wrong .. I may be using it wrong at worst.
Actually you can compute the Ethernet CRC32 with the builtin module of the STM32. It took me quite a while to make it match up as well.
This code should match up for sizes divisible by 4 (I also used python zlib on the other end):
#include "stm32l4xx_hal.h"
uint32_t CRC32_Compute(const uint32_t *data, size_t sizeIn32BitWords)
{
CRC_HandleTypeDef hcrc = {
.Instance = CRC,
.Init.DefaultPolynomialUse = DEFAULT_POLYNOMIAL_ENABLE,
.Init.DefaultInitValueUse = DEFAULT_INIT_VALUE_ENABLE,
.Init.InputDataInversionMode = CRC_INPUTDATA_INVERSION_WORD,
.Init.OutputDataInversionMode = CRC_OUTPUTDATA_INVERSION_ENABLE,
.InputDataFormat = CRC_INPUTDATA_FORMAT_WORDS,
};
HAL_StatusTypeDef status = HAL_CRC_Init(&hcrc);
assert (status == HAL_OK)
uint32_t checksum = HAL_CRC_Calculate(&hcrc, data, sizeIn32BitWords);
uint32_t checksumInverted = ~checksum;
return checksumInverted;
}
The challenge with sizes not divisible by 4 is to get the "inversion/reversal" (changing the bit order) right. There is an example how the hardware handles this in the "RM0394 Reference manual STM32L43xxx STM32L44xxx STM32L45xxx STM32L46xxx advanced ARM®-based 32-bit MCUs Rev 3" on page 333.
The essence is that reversal reverses the bit order. For CRC32 this reversal must happen on the word level, i.e. over 32 bits.
Ok. It certainly was a bug on my part. But it was happening in my python code.
I suddenly realized that I was practically doing bytes(0x6F) which just creates an array with 111 positions.
What I actually needed to do was
import struct
d = pack('B', 0x6F)
crc = zlib.crc32(bytes(d))&0xFFFFFFFF
This question could have been avoided had I just done a little bit of rubber duck debugging. Hopefuly this will help someone else.
Put quickly:
I want to send a full char * from a C module (build with SWIG) to a python callback function.
I can already do most of this BUT: my char array is just binary data and therefore contains 0s which is being seen as a NULL terminator in the C->Python conversion. Python only receives the bytes up until the first 0 rather than the full array.
In the swig documentation it specifically says that treating char * as binary data is possible with a typemap but doesn't say how. I've been playing with cstring.i (%cstring_output_withsize etc) but I am relatively new to SWIG and am out of my depth.
How do I tell SWIG to ignore the NULL terminator and use a size value instead when passing to python?
=========================================================
If you need more detail:
For a little background, I'm writing a very simple network packet format/transfer protocol for a 802.15.4 RF network. There are multiple architectures, languages and transceivers on the network so I'm writing my library to have a unified settable callback function allowing a small wrapper to be written on each platform that passes a buffer of bytes to the transciever using that host language.
Below are the key parts of the code.
//defined in Packets.h
void (*send_callback_ptr)(char * payload, uint8_t payload_length, uint16_t destination, uint16_t sequence_no);
//a function to set the callback
void send_callback_set(void (*f)(char * payload_buffer, uint8_t payload_length, uint16_t destination, uint16_t sequence_no))
{
send_callback_ptr = f;
}
uint8_t send(uint16_t target, enum PACKET_TYPE type, void * packet_object) {
char * payload_buffer = malloc(128); //max buffer size is 128
uint8_t payload_length = 12;
pack(target, type, packet_object, payload_buffer);
uint16_t destination = target;
uint16_t sequence_no = GLOBAL.hash++;
send_callback_ptr(payload, payload_length, destination, sequence_no);
};
At the top is the callback and the callback-setting function. 'send' takes a target, a packet type and a packet object (void * so python can pass it a pointer to a PyObject). It then packs all the data into char * payload_buffer using 'pack()' (pack just uses a set of rules to pack a packet object into a byte array ready to send).
def py_callback(buffer, buffer_length, destination, seq_no):
print buffer_length
print "type:", type(buffer)
print "length:", len(buffer)
for i in range(len(buffer)):
print i, '{:02X}'.format(ord(buffer[i]))
MyModule.send_callback_set(py_callback)
The python then looks like this which recieves the payload, payload_length etc from the call to send_callback_ptr at the end of the C function send().
In one example, the python callback function only receives 3 bytes when it should get 13 because the 4th byte in the array is a 0.
I'm trying to send messages through the serial USB interface of my Arduino (C++) to a Raspberry Pi (Python).
On the Arduino side I define a struct which I then copy into a char[]. The last part of the struct contains a checksum that I want to calculate using CRC32. I copy the struct into a temporary char array -4 bytes to strip the checksum field. The checksum is then calculated using the temporary array and the result is added to the struct. The struct is then copied into byteMsg which gets send over the serial connection.
On the raspberry end I do the reverse, I receive the bytestring and calculate the checksum over the message - 4 bytes. Then unpack the bytestring and compare the received and calculated checksum but this fails unfortunately.
For debugging I compared the crc32 check on both the python and arduino for the string "Hello World" and they generated the same checksum so doesn't seem to be a problem with the library. The raspberry is also able to decode the rest of the message just fine so the unpacking of the data into variables seem to be ok as well.
Any help would be much appreciated.
The Python Code:
def unpackMessage(self, message):
""" Processes a received byte string from the arduino """
# Unpack the received message into struct
(messageID, acknowledgeID, module, commandType,
data, recvChecksum) = struct.unpack('<LLBBLL', message)
# Calculate the checksum of the recv message minus the last 4
# bytes that contain the sent checksum
calcChecksum = crc32(message[:-4])
if recvChecksum == calcChecksum:
print "Checksum checks out"
The Aruino crc32 library taken from http://excamera.com/sphinx/article-crc.html
crc32.h
#include <avr/pgmspace.h>
static PROGMEM prog_uint32_t crc_table[16] = {
0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
};
unsigned long crc_update(unsigned long crc, byte data)
{
byte tbl_idx;
tbl_idx = crc ^ (data >> (0 * 4));
crc = pgm_read_dword_near(crc_table + (tbl_idx & 0x0f)) ^ (crc >> 4);
tbl_idx = crc ^ (data >> (1 * 4));
crc = pgm_read_dword_near(crc_table + (tbl_idx & 0x0f)) ^ (crc >> 4);
return crc;
}
unsigned long crc_string(char *s)
{
unsigned long crc = ~0L;
while (*s)
crc = crc_update(crc, *s++);
crc = ~crc;
return crc;
}
Main Arduino Sketch
struct message_t {
unsigned long messageID;
unsigned long acknowledgeID;
byte module;
byte commandType;
unsigned long data;
unsigned long checksum;
};
void sendMessage(message_t &msg)
{
// Set the messageID
msg.messageID = 10;
msg.checksum = 0;
// Copy the message minus the checksum into a char*
// Then perform the checksum on the message and copy
// the full msg into byteMsg
char byteMsgForCrc32[sizeof(msg)-4];
memcpy(byteMsgForCrc32, &msg, sizeof(msg)-4);
msg.checksum = crc_string(byteMsgForCrc32);
char byteMsg[sizeof(msg)];
memcpy(byteMsg, &msg, sizeof(msg));
Serial.write(byteMsg, sizeof(byteMsg));
void loop() {
message_t msg;
msg.module = 0x31;
msg.commandType = 0x64;
msg.acknowledgeID = 0;
msg.data = 10;
sendMessage(msg);
Kind Regards,
Thiezn
You are making the classic struct-to-network/serial/insert communication layer mistake. Structs have hidden padding in order to align the members onto suitable memory boundaries. This is not guaranteed to be the same across different computers, let alone different CPUs/microcontrollers.
Take this struct as an example:
struct Byte_Int
{
int x;
char y;
int z;
}
Now on a basic 32-bit x86 CPU you have a 4-byte memory boundary. Meaning that variables are aligned to either 4 bytes, 2 bytes or not at all according to the type of variable. The example would look like this in memory: int x on bytes 0,1,2,3, char y on byte 4, int z on bytes 8,9,10,11. Why not use the three bytes on the second line? Because then the memory controller would have to do two fetches to get a single number! A controller can only read one line at a time. So, structs (and all other kinds of data) have hidden padding in order to get variables aligned in memory. The example struct would have a sizeof() of 12, and not 9!
Now, how does that relate to your problem? You are memcpy()ing a struct directly into a buffer, including the padding. The computer on the other end doesn't know about this padding and misinterprets the data. What you need a serialization function that takes the members of your structs and pasts them into a buffer one at a time, that way you lose the padding and end up with something like this:
[0,1,2,3: int x][4: char y][5,6,7,8: int z]. All as one lengthy bytearray/string which can be safely sent using Serial(). Of course on the other end you would have to parse this string into intelligible data. Python's unpack() does this for you as long as you give the right format string.
Lastly, an int on an Arduino is 16 bits long. On a pc generally 4! So assemble your unpack format string with care.
The char array I was passing to the crc_string function contained '\0' characters. The crc_string was iterating through the array until it found a '\0' which shouldn't happen in this case since I was using the char array as a stream of bytes to be sent over a serial connection.
I've changed the crc_string function to take the array size as argument and iterate through the array using that value. This solved the issue.
Here's the new function
unsigned long crc_string(char *s, size_t arraySize)
{
unsigned long crc = ~0L;
for (int i=0; i < arraySize; i++) {
crc = crc_update(crc, s[i]);
}
crc = ~crc;
return crc;
}
This is the code I am currently using to send and receive int values from a RaspberryPi to an Arduino using i2C. It works fine for values 0-255, but because of the 1 byte limit, anything larger fails.
To circumvent this, I'd like to send and receive string values instead, and then convert back to int if necessary.
What changes would I need to make in the following?
Here is my RPi Python code
import smbus
import time
# for RPI version 1, use "bus = smbus.SMBus(0)"
bus = smbus.SMBus(1)
# This is the address we setup in the Arduino Program
address = 0x04
def writeNumber(value):
bus.write_byte(address, value)
# bus.write_byte_data(address, 0, value)
return -1
def readNumber():
number = bus.read_byte(address)
# number = bus.read_byte_data(address, 1)
return number
while True:
try:
var = int(raw_input("Enter 1 - 9: "))
except ValueError:
print "Could you at least give me an actual number?"
continue
writeNumber(var)
print "RPI: Hi Arduino, I sent you ", var
# sleep one second
#time.sleep(1)
number = readNumber()
print "Arduino: Hey RPI, I received a digit ", number
print
And here is my Arduino code
#include <Wire.h>
#define SLAVE_ADDRESS 0x04
int number = 0;
int state = 0;
void setup() {
pinMode(13, OUTPUT);
Serial.begin(9600); // start serial for output
// initialize i2c as slave
Wire.begin(SLAVE_ADDRESS);
// define callbacks for i2c communication
Wire.onReceive(receiveData);
Wire.onRequest(sendData);
Serial.println("Ready!");
}
void loop() {
delay(100);
}
// callback for received data
void receiveData(int byteCount){
while(Wire.available()) {
number = Wire.read();
if (Wire.available() > 1) // at least 2 bytes
{
number = Wire.read() * 256 + Wire.read();
}
Serial.print("data received: ");
Serial.println(number);
//sendData();
if (number == 1){
if (state == 0){
digitalWrite(13, HIGH); // set the LED on
state = 1;
}
else{
digitalWrite(13, LOW); // set the LED off
state = 0;
}
}
}
}
// callback for sending data
void sendData(){
Wire.write(number);
}
This problem essentially has two parts: splitting an integer into its bytes and reassembling an integer from bytes. These parts must be replicated on both the Pi and Arduino. I'll address the Pi side first, in Python:
Splitting an integer:
def writeNumber(value):
# assuming we have an arbitrary size integer passed in value
for character in str(val): # convert into a string and iterate over it
bus.write_byte(address, ord(character)) # send each char's ASCII encoding
return -1
Reassembling an integer from bytes:
def readNumber():
# I'm not familiar with the SMbus library, so you'll have to figure out how to
# tell if any more bytes are available and when a transmission of integer bytes
# is complete. For now, I'll use the boolean variable "bytes_available" to mean
# "we are still transmitting a single value, one byte at a time"
byte_list = []
while bytes_available:
# build list of bytes in an integer - assuming bytes are sent in the same
# order they would be written. Ex: integer '123' is sent as '1', '2', '3'
byte_list.append(bus.read_byte(address))
# now recombine the list of bytes into a single string, then convert back into int
number = int("".join([chr(byte) for byte in byte_list]))
return number
Arduino Side, in C
Split an Integer:
void sendData(){
int i = 0;
String outString = String(number); /* convert integer to string */
int len = outString.length()+1 /* obtain length of string w/ terminator */
char ascii_num[len]; /* create character array */
outString.toCharArray(ascii_num, len); /* copy string to character array */
for (i=0; i<len); ++i){
Wire.write(ascii_num[i]);
}
}
Reassembling a received Integer:
Note: I'm having some trouble understanding what your other code in this routine is doing, so I'm going to reduce it to just assembling the integer.
void receiveData(int byteCount){
int inChar;
String inString = "";
/* As with the Python receive routine, it will be up to you to identify the
terminating condition for this loop - "bytes_available" means the same thing
as before */
while(bytes_available){
inChar = Wire.read();
inString += char(inChar);
}
number = inString.toInt();
}
I don't have the materials on hand to test this, so it's possible I've gotten the byte order flipped in one routine or another. If you find stuff coming in or out backwards, the easiest place to fix it is in the Python script by using the built-in function reversed() on the strings or lists.
References (I used some code from the Arduino Examples):
Arduino String objects
Arduino String Constructors
Python Built-ins chr() and ord()
Check the following Link:
[http://www.i2c-bus.org/][1]
When I was sending data back and forward using I2C I was converting the string characters to bytearrays and viceversa. So since you are always sending bytes. It will always work since you are sending numbers between 0-255.
Not sure this helps but at least may give you an idea.
You could convert the number to a string of digits like you said. But you could also send the raw bytes.
String of digits
Advantages
Number can have infinite digits. Note that when Arduino reads the number as a string, it is infinite, but you can't convert it all to integer if it overflows the 16-bit range (or 32-bit for Due).
Disadvantages
Variable size, thus requiring more effort in reading.
Waste of bytes, because each decimal digit would be a byte, plus the null-terminator totalizing (digits + 1) size.
Having to use decimal arithmetic (which really is only useful for human counting), note that a "number to string" operation also uses decimal arithmetic.
You can't send/receive negative numbers (unless you send the minus signal, wasting more time and bytes).
Raw bytes
Advantages
Number of bytes sent for each integer is always 4.
You can send/receive negative numbers.
The bitwise arithmetic in C++ for extracting each byte from the number is really fast.
Python already has the struct library which packs/unpacks each byte in a number to a string to send/receive, so you don't need to do the arithmetic like in C++.
Disadvantages
Number has a limited range (signed 32-bit integer in our case, which ranges from -2147483648 to 2147483647). But it doesn't matter because no Arduino can handle more than 32-bit anyways.
So I would use the raw bytes method, which I can provide some untested functions here:
import struct
# '<i' stands for litle-endian signed integer
def writeNumber(value):
strout = struct.pack('<i', value)
for i in range(4):
bus.write_byte(address, strout[i])
return -1
def readNumber():
strin = ""
for _ in range(4):
strin += bus.read_byte(address)
return struct.unpack('<i', strin)[0]
And the Arduino part:
void receiveData(int byteCount)
{
// Check if we have a 32-bit number (4 bytes) in queue
while(Wire.available() >= 4)
{
number = 0;
for(int i = 0; i < 32; i += 8)
{
// This is merging the bytes into a single integer
number |= ((int)Wire.read() << i);
}
Serial.print("data received: ");
Serial.println(number);
// ...
}
}
void sendData()
{
for(int i = 0; i < 32; i += 8)
{
// This is extracting each byte from the number
Wire.write((number >> i) & 0xFF);
}
}
I don't have any experience with I2C, but if its queue is a FIFO, then the code should work.
I am very new at using Python and very rusty with C, so I apologize in advance for how dumb and/or lost I sound.
I have function in C that creates a .dat file containing data. I am opening the file using Python to read the file. One of the things I need to read are a struct that was created in the C function and printed in binary. In my Python code I am at the appropriate line of the file to read in the struct. I have tried both unpacking the stuct item by item and as a whole without success. Most of the items in the struct were declared 'real' in the C code. I am working on this code with someone else and the main source code is his and has declared the variables as 'real'. I need to put this in a loop because I want to read all of the files in the directory that end in '.dat'. To start the loop I have:
for files in os.listdir(path):
if files.endswith(".dat"):
part = open(path + files, "rb")
for line in part:
Which then I read all of the lines previous to the one containing the struct. Then I get to that line and have:
part_struct = part.readline()
r = struct.unpack('<d8', part_struct[0])
I'm trying to just read the first thing stored in the struct. I saw an example of this somewhere on here. And when I try this I'm getting an error that reads:
struct.error: repeat count given without format specifier
I will take any and all tips someone can give me. I have been stuck on this for a few days and have tried many different things. To be honest, I think I don't understand the struct module but I've read as much as I could on it.
Thanks!
You could use ctypes.Structure or struct.Struct to specify format of the file. To read structures from the file produced by C code in #perreal's answer:
"""
struct { double v; int t; char c;};
"""
from ctypes import *
class YourStruct(Structure):
_fields_ = [('v', c_double),
('t', c_int),
('c', c_char)]
with open('c_structs.bin', 'rb') as file:
result = []
x = YourStruct()
while file.readinto(x) == sizeof(x):
result.append((x.v, x.t, x.c))
print(result)
# -> [(12.100000381469727, 17, 's'), (12.100000381469727, 17, 's'), ...]
See io.BufferedIOBase.readinto(). It is supported in Python 3 but it is undocumented in Python 2.7 for a default file object.
struct.Struct requires to specify padding bytes (x) explicitly:
"""
struct { double v; int t; char c;};
"""
from struct import Struct
x = Struct('dicxxx')
with open('c_structs.bin', 'rb') as file:
result = []
while True:
buf = file.read(x.size)
if len(buf) != x.size:
break
result.append(x.unpack_from(buf))
print(result)
It produces the same output.
To avoid unnecessary copying Array.from_buffer(mmap_file) could be used to get an array of structs from a file:
import mmap # Unix, Windows
from contextlib import closing
with open('c_structs.bin', 'rb') as file:
with closing(mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_COPY)) as mm:
result = (YourStruct * 3).from_buffer(mm) # without copying
print("\n".join(map("{0.v} {0.t} {0.c}".format, result)))
Some C code:
#include <stdio.h>
typedef struct { double v; int t; char c;} save_type;
int main() {
save_type s = { 12.1f, 17, 's'};
FILE *f = fopen("output", "w");
fwrite(&s, sizeof(save_type), 1, f);
fwrite(&s, sizeof(save_type), 1, f);
fwrite(&s, sizeof(save_type), 1, f);
fclose(f);
return 0;
}
Some Python code:
import struct
with open('output', 'rb') as f:
chunk = f.read(16)
while chunk != "":
print len(chunk)
print struct.unpack('dicccc', chunk)
chunk = f.read(16)
Output:
(12.100000381469727, 17, 's', '\x00', '\x00', '\x00')
(12.100000381469727, 17, 's', '\x00', '\x00', '\x00')
(12.100000381469727, 17, 's', '\x00', '\x00', '\x00')
but there is also the padding issue. The padded size of save_type is 16, so we read 3 more characters and ignore them.
A number in the format specifier means a repeat count, but it has to go before the letter, like '<8d'. However you said you just want to read one element of the struct. I guess you just want '<d'. I guess you are trying to specify the number of bytes to read as 8, but you don't need to do that. d assumes that.
I also noticed you are using readline. That seems wrong for reading binary data. It will read until the next carriage return / line feed, which will occur randomly in binary data. What you want to do is use read(size), like this:
part_struct = part.read(8)
r = struct.unpack('<d', part_struct)
Actually, you should be careful, as read can return less data than you request. You need to repeat it if it does.
part_struct = b''
while len(part_struct) < 8:
data = part.read(8 - len(part_struct))
if not data: raise IOException("unexpected end of file")
part_struct += data
r = struct.unpack('<d', part_struct)
I had same problem recently, so I had made module for the task, stored here: http://pastebin.com/XJyZMyHX
example code:
MY_STRUCT="""typedef struct __attribute__ ((__packed__)){
uint8_t u8;
uint16_t u16;
uint32_t u32;
uint64_t u64;
int8_t i8;
int16_t i16;
int32_t i32;
int64_t i64;
long long int lli;
float flt;
double dbl;
char string[12];
uint64_t array[5];
} debugInfo;"""
PACKED_STRUCT='\x01\x00\x01\x00\x00\x01\x00\x00\x00\x00\x00\x01\x00\x00\x00\xff\x00\xff\x00\x00\xff\xff\x00\x00\x00\x00\xff\xff\xff\xff*\x00\x00\x00\x00\x00\x00\x00ff\x06#\x14\xaeG\xe1z\x14\x08#testString\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00'
if __name__ == '__main__':
print "String:"
print depack_bytearray_to_str(PACKED_STRUCT,MY_STRUCT,"<" )
print "Bytes in Stuct:"+str(structSize(MY_STRUCT))
nt=depack_bytearray_to_namedtuple(PACKED_STRUCT,MY_STRUCT,"<" )
print "Named tuple nt:"
print nt
print "nt.string="+nt.string
The result should be:
String:
u8:1
u16:256
u32:65536
u64:4294967296
i8:-1
i16:-256
i32:-65536
i64:-4294967296
lli:42
flt:2.09999990463
dbl:3.01
string:u'testString\x00\x00'
array:(1, 2, 3, 4, 5)
Bytes in Stuct:102
Named tuple nt:
CStruct(u8=1, u16=256, u32=65536, u64=4294967296L, i8=-1, i16=-256, i32=-65536, i64=-4294967296L, lli=42, flt=2.0999999046325684, dbl=3.01, string="u'testString\\x00\\x00'", array=(1, 2, 3, 4, 5))
nt.string=u'testString\x00\x00'
Numpy can be used to read/write binary data. You just need to define a custom np.dtype instance that defines the memory layout of your c-struct.
For example, here is some C++ code defining a struct (should work just as well for C structs, though I'm not a C expert):
struct MyStruct {
uint16_t FieldA;
uint16_t pad16[3];
uint32_t FieldB;
uint32_t pad32[2];
char FieldC[4];
uint64_t FieldD;
uint64_t FieldE;
};
void write_struct(const std::string& fname, MyStruct h) {
// This function serializes a MyStruct instance and
// writes the binary data to disk.
std::ofstream ofp(fname, std::ios::out | std::ios::binary);
ofp.write(reinterpret_cast<const char*>(&h), sizeof(h));
}
Based on the advice I found at stackoverflow.com/a/5397638, I've included some padding in the struct (the pad16 and pad32 fields) so that serialization will happen in a more predictable way. I think that this is a C++ thing; it might not be necessary when using plain ol' C structs.
Now, in python, we create a numpy.dtype object describing the memory-layout of MyStruct:
import numpy as np
my_struct_dtype = np.dtype([
("FieldA" , np.uint16 , ),
("pad16" , np.uint16 , (3,) ),
("FieldB" , np.uint32 ),
("pad32" , np.uint32 , (2,) ),
("FieldC" , np.byte , (4,) ),
("FieldD" , np.uint64 ),
("FieldE" , np.uint64 ),
])
Then use numpy's fromfile to read the binary file where you've saved your c-struct:
# read data
struct_data = np.fromfile(fpath, dtype=my_struct_dtype, count=1)[0]
FieldA = struct_data["FieldA"]
FieldB = struct_data["FieldB"]
FieldC = struct_data["FieldC"]
FieldD = struct_data["FieldD"]
FieldE = struct_data["FieldE"]
if FieldA != expected_value_A:
raise ValueError("Bad FieldA, got %d" % FieldA)
if FieldB != expected_value_B:
raise ValueError("Bad FieldB, got %d" % FieldB)
if FieldC.tobytes() != b"expc":
raise ValueError("Bad FieldC, got %s" % FieldC.tobytes().decode())
...
The count=1 argument in the above call np.fromfile(..., count=1) is so that the returned array will have only one element; this means "read the first struct instance from the file". Note that I am indexing [0] to get that element out of the array.
If you have appended the data from many c-structs to the same file, you can use fromfile(..., count=n) to read n struct instances into a numpy array of shape (n,). Setting count=-1, which is the default for the np.fromfile and np.frombuffer functions, means "read all of the data", resulting in a 1-dimensional array of shape (number_of_struct_instances,).
You can also use the offset keyword argument to np.fromfile to control where in the file the data read will begin.
To conclude, here are some numpy functions that will be useful once your custom dtype has been defined:
Reading binary data as a numpy array:
np.frombuffer(bytes_data, dtype=...):
Interpret the given binary data (e.g. a python bytes instance)
as a numpy array of the given dtype. You can define a custom
dtype that describes the memory layout of your c struct.
np.fromfile(filename, dtype=...):
Read binary data from filename. Should be the same result as
np.frombuffer(open(filename, "rb").read(), dtype=...).
Writing a numpy array as binary data:
ndarray.tobytes():
Construct a python bytes instance containing
raw data from the given numpy array. If the array's data has dtype
corresponding to a c-struct, then the bytes coming from
ndarray.tobytes can be deserialized
by c/c++ and interpreted as an (array of) instances of that c-struct.
ndarray.tofile(filename):
Binary data from the array is written to filename.
This data could then be deserialized by c/c++.
Equivalent to open("filename", "wb").write(a.tobytes()).