CRC32 hash of python string - python

Using an existing C example algorithm, I want to generate the correct CRC32 hash for a string in python. However, I am receiving incorrect results. I mask the result of every operation and attempt to copy the original algorithm's logic. The C code was provided by the same website which has a webpage string hash checking tool, so it is likely to be correct.
Below is a complete Python file including C code in its comments which it attempts to mimic. All pertinent information is in the file.
P_32 = 0xEDB88320
init = 0xffffffff
_ran = True
tab32 = []
def mask32(n):
return n & 0xffffffff
def mask8(n):
return n & 0x000000ff
def mask1(n):
return n & 0x00000001
def init32():
for i in range(256):
crc = mask32(i)
for j in range(8):
if (mask1(crc) == 1):
crc = mask32(mask32(crc >> 1) ^ P_32)
else:
crc = mask32(crc >> 1)
tab32.append(crc)
global _ran
_ran = False
def update32(crc, char):
char = mask8(char)
t = crc ^ char
crc = mask32(mask32(crc >> 8) ^ tab32[mask8(t)])
return crc
def run(string):
if _ran:
init32()
crc = init
for c in string:
crc = update32(crc, ord(c))
print(hex(crc)[2:].upper())
check0 = "The CRC32 of this string is 4A1C449B"
check1 = "123456789" # CBF43926
run(check0) # Produces B5E3BB64
run(check1) # Produces 340BC6D9
# Check CRC-32 on http://www.lammertbies.nl/comm/info/crc-calculation.html#intr
"""
/* http://www.lammertbies.nl/download/lib_crc.zip */
#define P_32 0xEDB88320L
static int crc_tab32_init = FALSE;
static unsigned long crc_tab32[256];
/*******************************************************************\
* *
* unsigned long update_crc_32( unsigned long crc, char c ); *
* *
* The function update_crc_32 calculates a new CRC-32 value *
* based on the previous value of the CRC and the next byte *
* of the data to be checked. *
* *
\*******************************************************************/
unsigned long update_crc_32( unsigned long crc, char c ) {
unsigned long tmp, long_c;
long_c = 0x000000ffL & (unsigned long) c;
if ( ! crc_tab32_init ) init_crc32_tab();
tmp = crc ^ long_c;
crc = (crc >> 8) ^ crc_tab32[ tmp & 0xff ];
return crc;
} /* update_crc_32 */
/*******************************************************************\
* *
* static void init_crc32_tab( void ); *
* *
* The function init_crc32_tab() is used to fill the array *
* for calculation of the CRC-32 with values. *
* *
\*******************************************************************/
static void init_crc32_tab( void ) {
int i, j;
unsigned long crc;
for (i=0; i<256; i++) {
crc = (unsigned long) i;
for (j=0; j<8; j++) {
if ( crc & 0x00000001L ) crc = ( crc >> 1 ) ^ P_32;
else crc = crc >> 1;
}
crc_tab32[i] = crc;
}
crc_tab32_init = TRUE;
} /* init_crc32_tab */
"""

There's just one thing that's wrong with the current implementation and the fix is actually just one line of code to the end of your run function which is:
crc = crc ^ init
Which if added to your run function look like this:
def run(string):
if _ran:
init32()
crc = init
for c in string:
crc = update32(crc, ord(c))
crc = crc ^ init
print(hex(crc)[2:].upper())
This will give you the correct results you are expecting.The reason that this is necessary is after you are done updating the CRC32, the finalization of it is XORing it with the 0xFFFFFFFF. Since you only had the init table and update functions and not the finalize, you were one step off from the actual crc.
Another C implimentation that is a little more straightforward is this one it's a little bit easier to see the whole process. The only thing slightly obsure is the init poly ~0x0 is the same (0xFFFFFFFF).

Related

Unusual behaviour of Ant Colony Optimization for Closest String Problem in Python and C++

This is probably going to be a long question, I apologize in advance.
I'm working on a project with the goal of researching different solutions for the closest string problem.
Let s_1, ... s_n be strings of length m. Find a string s of length m such that it minimizes max{d(s, s_i) | i = 1, ..., n}, where d is the hamming distance.
One solution that has been tried is one using ant colony optimization, as decribed here.
The paper itself does not go into implementation details, so I've done my best on efficiency. However, efficiency is not the only unusual behaviour.
I'm not sure whether it's common pratice to do so, but I will present my code through pastebin since I believe it would overwhelm the thread if I should put it directly here. If that turns out to be a problem, I won't mind editing the thread to put it here directly. As all the previous algorithms I've experimented with, I've written this one in python initially. Here's the code:
def solve_(self, problem: CSProblem) -> CSSolution:
m, n, alphabet, strings = problem.m, problem.n, problem.alphabet, problem.strings
A = len(alphabet)
rho = self.config['RHO']
colony_size = self.config['COLONY_SIZE']
global_best_ant = None
global_best_metric = m
ants = np.full((colony_size, m), '')
world_trails = np.full((m, A), 1 / A)
for iteration in range(self.config['MAX_ITERS']):
local_best_ant = None
local_best_metric = m
for ant_idx in range(colony_size):
for next_character_index in range(m):
ants[ant_idx][next_character_index] = random.choices(alphabet, weights=world_trails[next_character_index], k=1)[0]
ant_metric = utils.problem_metric(ants[ant_idx], strings)
if ant_metric < local_best_metric:
local_best_metric = ant_metric
local_best_ant = ants[ant_idx]
# First we perform pheromone evaporation
for i in range(m):
for j in range(A):
world_trails[i][j] = world_trails[i][j] * (1 - rho)
# Now, using the elitist strategy, only the best ant is allowed to update his pheromone trails
best_ant_ys = (alphabet.index(a) for a in local_best_ant)
best_ant_xs = range(m)
for x, y in zip(best_ant_xs, best_ant_ys):
world_trails[x][y] = world_trails[x][y] + (1 - local_best_metric / m)
if local_best_metric < global_best_metric:
global_best_metric = local_best_metric
global_best_ant = local_best_ant
return CSSolution(''.join(global_best_ant), global_best_metric)
The utils.problem_metric function looks like this:
def hamming_distance(s1, s2):
return sum(c1 != c2 for c1, c2 in zip(s1, s2))
def problem_metric(string, references):
return max(hamming_distance(string, r) for r in references)
I've seen that there are a lot more tweaks and other parameters you can add to ACO, but I've kept it simple for now. The configuration I'm using is is 250 iterations, colony size od 10 ants and rho=0.1. The problem that I'm testing it on is from here: http://tcs.informatik.uos.de/research/csp_cssp , the one called 2-10-250-1-0.csp (the first one). The alphabet consists only of '0' and '1', the strings are of length 250, and there are 10 strings in total.
For the ACO configuration that I've mentioned, this problem, using the python solver, gets solved on average in 5 seconds, and the average target function value is 108.55 (simulated 20 times). The correct target function value is 96. Ironically, the 5-second average is good compared to what it used to be in my first attempt of implementing this solution. However, it's still surprisingly slow.
After doing all kinds of optimizations, I've decided to try and implement the exact same solution in C++ so see whether there will be a significant difference between the running times. Here's the C++ solution:
#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <string>
#include <random>
#include <chrono>
#include <map>
class CSPProblem{
public:
int m;
int n;
std::vector<char> alphabet;
std::vector<std::string> strings;
CSPProblem(int m, int n, std::vector<char> alphabet, std::vector<std::string> strings)
: m(m), n(n), alphabet(alphabet), strings(strings)
{
}
static CSPProblem from_csp(std::string filepath){
std::ifstream file(filepath);
std::string line;
std::vector<std::string> input_lines;
while (std::getline(file, line)){
input_lines.push_back(line);
}
int alphabet_size = std::stoi(input_lines[0]);
int n = std::stoi(input_lines[1]);
int m = std::stoi(input_lines[2]);
std::vector<char> alphabet;
for (int i = 3; i < 3 + alphabet_size; i++){
alphabet.push_back(input_lines[i][0]);
}
std::vector<std::string> strings;
for (int i = 3 + alphabet_size; i < input_lines.size(); i++){
strings.push_back(input_lines[i]);
}
return CSPProblem(m, n, alphabet, strings);
}
int hamm(const std::string& s1, const std::string& s2) const{
int h = 0;
for (int i = 0; i < s1.size(); i++){
if (s1[i] != s2[i])
h++;
}
return h;
}
int measure(const std::string& sol) const{
int mm = 0;
for (const auto& s: strings){
int h = hamm(sol, s);
if (h > mm){
mm = h;
}
}
return mm;
}
friend std::ostream& operator<<(std::ostream& out, CSPProblem problem){
out << "m: " << problem.m << std::endl;
out << "n: " << problem.n << std::endl;
out << "alphabet_size: " << problem.alphabet.size() << std::endl;
out << "alphabet: ";
for (const auto& a: problem.alphabet){
out << a << " ";
}
out << std::endl;
out << "strings:" << std::endl;
for (const auto& s: problem.strings){
out << "\t" << s << std::endl;
}
return out;
}
};
std::random_device rd;
std::mt19937 gen(rd());
int get_from_distrib(const std::vector<float>& weights){
std::discrete_distribution<> d(std::begin(weights), std::end(weights));
return d(gen);
}
int max_iter = 250;
float rho = 0.1f;
int colony_size = 10;
int ant_colony_solver(const CSPProblem& problem){
srand(time(NULL));
int m = problem.m;
int n = problem.n;
auto alphabet = problem.alphabet;
auto strings = problem.strings;
int A = alphabet.size();
float init_pher = 1.0 / A;
std::string global_best_ant;
int global_best_matric = m;
std::vector<std::vector<float>> world_trails(m, std::vector<float>(A, 0.0f));
for (int i = 0; i < m; i++){
for (int j = 0; j < A; j++){
world_trails[i][j] = init_pher;
}
}
std::vector<std::string> ants(colony_size, std::string(m, ' '));
for (int iteration = 0; iteration < max_iter; iteration++){
std::string local_best_ant;
int local_best_metric = m;
for (int ant_idx = 0; ant_idx < colony_size; ant_idx++){
for (int next_character_idx = 0; next_character_idx < m; next_character_idx++){
char next_char = alphabet[get_from_distrib(world_trails[next_character_idx])];
ants[ant_idx][next_character_idx] = next_char;
}
int ant_metric = problem.measure(ants[ant_idx]);
if (ant_metric < local_best_metric){
local_best_metric = ant_metric;
local_best_ant = ants[ant_idx];
}
}
// Evaporation
for (int i = 0; i < m; i++){
for (int j = 0; j < A; j++){
world_trails[i][j] = world_trails[i][j] + (1.0 - rho);
}
}
std::vector<int> best_ant_xs;
for (int i = 0; i < m; i++){
best_ant_xs.push_back(i);
}
std::vector<int> best_ant_ys;
for (const auto& c: local_best_ant){
auto loc = std::find(std::begin(alphabet), std::end(alphabet), c);
int idx = loc- std::begin(alphabet);
best_ant_ys.push_back(idx);
}
for (int i = 0; i < m; i++){
int x = best_ant_xs[i];
int y = best_ant_ys[i];
world_trails[x][y] = world_trails[x][y] + (1.0 - static_cast<float>(local_best_metric) / m);
}
if (local_best_metric < global_best_matric){
global_best_matric = local_best_metric;
global_best_ant = local_best_ant;
}
}
return global_best_matric;
}
int main(){
auto problem = CSPProblem::from_csp("in.csp");
int TRIES = 20;
std::vector<int> times;
std::vector<int> measures;
for (int i = 0; i < TRIES; i++){
auto start = std::chrono::high_resolution_clock::now();
int m = ant_colony_solver(problem);
auto stop = std::chrono::high_resolution_clock::now();
int duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start).count();
times.push_back(duration);
measures.push_back(m);
}
float average_time = static_cast<float>(std::accumulate(std::begin(times), std::end(times), 0)) / TRIES;
float average_measure = static_cast<float>(std::accumulate(std::begin(measures), std::end(measures), 0)) / TRIES;
std::cout << "Average running time: " << average_time << std::endl;
std::cout << "Average solution: " << average_measure << std::endl;
std::cout << "all solutions: ";
for (const auto& m: measures) std::cout << m << " ";
std::cout << std::endl;
return 0;
}
The average running time now is only 530.4 miliseconds. However, the average target function value is 122.75, which is significantly higher than that of the python solution.
If the average function values were the same, and the times were as they are, I would simply write this off as 'C++ is faster than python' (even though the difference in speed is also very suspiscious). But, since C++ yields worse solutions, it leads me to believe that I've done something wrong in C++. What I'm suspiscious of is the way I'm generating an alphabet index using weights. In python I've done it using random.choices as follows:
ants[ant_idx][next_character_index] = random.choices(alphabet, weights=world_trails[next_character_index], k=1)[0]
As for C++, I haven't done it in a while so I'm a bit rusty on reading cppreference (which is a skill of its own), and the std::discrete_distribution solution is something I've plain copied from the reference:
std::random_device rd;
std::mt19937 gen(rd());
int get_from_distrib(const std::vector<float>& weights){
std::discrete_distribution<> d(std::begin(weights), std::end(weights));
return d(gen);
}
The suspiscious thing here is the fact that I'm declaring the std::random_device and std::mt19937 objects globally and using the same ones every time. I have not been able to find an answer to whether this is the way they're meant to be used. However, if I put them in the function:
int get_from_distrib(const std::vector<float>& weights){
std::random_device rd;
std::mt19937 gen(rd());
std::discrete_distribution<> d(std::begin(weights), std::end(weights));
return d(gen);
}
the average running time gets significantly worse, clocking in at 8.84 seconds. However, even more surprisingly, the average function value gets worse as well, at 130.
Again, if only one of the two things changed (say if only the time went up) I would have been able to draw some conclusions. This way it only gets more confusing.
So, does anybody have an idea of why this is happening?
Thanks in advance.
MAJOR EDIT: I feel embarrased having asked such a huge question when in fact the problem lies in a simple typo. Namely in the evaporation step in the C++ version I put a + instead of a *.
Now the algorithms behave identically in terms of average solution quality.
However, I could still use some tips on how to optimize the python version.
Apart form the dumb mistake I've mentioned in the question edit, it seems I've finally found a way to optimize the python solution decently. First of all, keeping world_trails and ants as numpy arrays instead of lists of lists actually slowed things down. Furthermore, I actually stopped keeping a list of ants altogether since I only ever need the best one per iteration.
Lastly, running cProfile indicated that a lot of the time was spent on random.choices, therefore I've decided to implement my own version of it suited specifically for this case. I've done this by pre-computing total weight sum per character for each next iteration (in the trail_row_wise_sums array), and using the following function:
def fast_pick(arr, weights, ws):
r = random.random()*ws
for i in range(len(arr)):
if r < weights[i]:
return arr[i]
r -= weights[i]
return 0
The new version now looks like this:
def solve_(self, problem: CSProblem) -> CSSolution:
m, n, alphabet, strings = problem.m, problem.n, problem.alphabet, problem.strings
A = len(alphabet)
rho = self.config['RHO']
colony_size = self.config['COLONY_SIZE']
miters = self.config['MAX_ITERS']
global_best_ant = None
global_best_metric = m
init_pher = 1.0 / A
world_trails = [[init_pher for _ in range(A)] for _ in range(m)]
trail_row_wise_sums = [1.0 for _ in range(m)]
for iteration in tqdm(range(miters)):
local_best_ant = None
local_best_metric = m
for _ in range(colony_size):
ant = ''.join(fast_pick(alphabet, world_trails[next_character_index], trail_row_wise_sums[next_character_index]) for next_character_index in range(m))
ant_metric = utils.problem_metric(ant, strings)
if ant_metric <= local_best_metric:
local_best_metric = ant_metric
local_best_ant = ant
# First we perform pheromone evaporation
for i in range(m):
for j in range(A):
world_trails[i][j] = world_trails[i][j] * (1 - rho)
# Now, using the elitist strategy, only the best ant is allowed to update his pheromone trails
best_ant_ys = (alphabet.index(a) for a in local_best_ant)
best_ant_xs = range(m)
for x, y in zip(best_ant_xs, best_ant_ys):
world_trails[x][y] = world_trails[x][y] + (1 - 1.0*local_best_metric / m)
if local_best_metric < global_best_metric:
global_best_metric = local_best_metric
global_best_ant = local_best_ant
trail_row_wise_sums = [sum(world_trails[i]) for i in range(m)]
return CSSolution(global_best_ant, global_best_metric)
The average running time is now down to 800 miliseconds (compared to 5 seconds that it was before). Granted, applying the same fast_pick optimization to the C++ solution did also speed up the C++ version (around 150 ms) but I guess now I can write it off as C++ being faster than python.
Profiler also showed that a lot of the time was spent on calculating Hamming distances, but that's to be expected, and apart from that I see no other way of computing the Hamming distance between arbitrary strings more efficiently.

Convert this CRC32 algorithm to Python 3.3

I need to convert this CRC32 algorithm to python (using 3.3), but I am a python noob. I tried the built in binascii.crc32(), but the CRC was incorrect. Apparently, STMicro does the CRC32 a bit different. I found an algorithm that works, now I just need it to be in python.
//****************************************************************************
DWORD Crc32Fast(DWORD Crc, DWORD Data)
{
static const DWORD CrcTable[16] = { // Nibble lookup table for 0x04C11DB7 polynomial
0x00000000,0x04C11DB7,0x09823B6E,0x0D4326D9,0x130476DC,0x17C56B6B,0x1A864DB2,0x1E475005,
0x2608EDB8,0x22C9F00F,0x2F8AD6D6,0x2B4BCB61,0x350C9B64,0x31CD86D3,0x3C8EA00A,0x384FBDBD };
Crc = Crc ^ Data; // Apply all 32-bits
// Process 32-bits, 4 at a time, or 8 rounds
Crc = (Crc << 4) ^ CrcTable[Crc >> 28]; // Assumes 32-bit reg, masking index to 4-bits
Crc = (Crc << 4) ^ CrcTable[Crc >> 28]; // 0x04C11DB7 Polynomial used in STM32
Crc = (Crc << 4) ^ CrcTable[Crc >> 28];
Crc = (Crc << 4) ^ CrcTable[Crc >> 28];
Crc = (Crc << 4) ^ CrcTable[Crc >> 28];
Crc = (Crc << 4) ^ CrcTable[Crc >> 28];
Crc = (Crc << 4) ^ CrcTable[Crc >> 28];
Crc = (Crc << 4) ^ CrcTable[Crc >> 28];
return(Crc);
}
//****************************************************************************
DWORD Crc32FastBlock(DWORD Crc, DWORD Size, DWORD *Buffer) // 32-bit units
{
while(Size--)
Crc = Crc32Fast(Crc, *Buffer++);
return(Crc);
}
If my understanding is not mistaken, this should be the code that you want:
CRC_TABLE = (0x00000000, 0x04C11DB7, 0x09823B6E, 0x0D4326D9,
0x130476DC, 0x17C56B6B, 0x1A864DB2, 0x1E475005,
0x2608EDB8, 0x22C9F00F, 0x2F8AD6D6, 0x2B4BCB61,
0x350C9B64, 0x31CD86D3, 0x3C8EA00A, 0x384FBDBD)
def dword(value):
return value & 0xFFFFFFFF
def crc32_fast(crc, data):
crc, data = dword(crc), dword(data)
crc ^= data
for _ in range(8):
crc = dword(crc << 4) ^ CRC_TABLE[crc >> 28]
return crc
def crc32_fast_block(crc, buffer):
for data in buffer:
crc = crc32_fast(crc, data)
return crc
def crc32_fast_bytes(crc, bytes_data, byteorder='big'):
if len(bytes_data) & 3:
raise ValueError('bytes_data length must be multiple of four')
for index in range(0, len(bytes_data), 4):
data = int.from_bytes(bytes_data[index:index+4], byteorder)
crc = crc32_fast(crc, data)
return crc
The function crc32_fast_block expects an initial crc value and an iterable of numbers to run the algorithm on. crc32_fast_bytes is almost the same but expects a bytes value with a length being a multiple of four.

Same crc32 for Python and C

I need script that will calculate crc32 with the same output for both Python and C.
I'm using right now zlib.crc32, but for C there is no such library and we are writing it on our own basing on Wikipedia. But it doesn't return the same value.
This is our code of C script (copied from wikipedia, based on RFC):
unsigned int crc32( unsigned char *message, unsigned int n )
{
//int i, crc;
unsigned int crc;
unsigned int i;
unsigned int byte, c;
const unsigned int g0 = 0xEDB88320, g1 = g0>>1,
g2 = g0>>2, g3 = g0>>3, g4 = g0>>4, g5 = g0>>5,
g6 = (g0>>6)^g0, g7 = ((g0>>6)^g0)>>1;
i = 0;
crc = 0xFFFFFFFF;
//while ((byte = message[i]) != 0)
while( i != n)
{
byte = message[i]; // Get next byte.
// byte = FrmReadByte( i ); // Get next byte.
crc = crc ^ byte;
c = ((crc<<31>>31) & g7) ^ ((crc<<30>>31) & g6) ^
((crc<<29>>31) & g5) ^ ((crc<<28>>31) & g4) ^
((crc<<27>>31) & g3) ^ ((crc<<26>>31) & g2) ^
((crc<<25>>31) & g1) ^ ((crc<<24>>31) & g0);
crc = ((unsigned)crc >> 8) ^ c;
i = i + 1;
}
return ~crc;
}
EDIT:
We've got only 4KB of ram memory, program itself doesn't live there. Taking 1KB of memory by crc32 script is probably too much and wouldn't fit there.
Thanks for pointing out that ZLIB library exists for C as well.
I'm using right now zlib.crc32, but for C there is no such library
Um, yes, there is. It's called zlib. zlib is written in C, and it's what Python is using! Hence the name of the class.
You can use the crc32() function in zlib. That implementation is a fair bit faster than others you might find. Read zlib.h for the interface information.
You can compile zlib yourself, or it may already be installed on your system.
Update:
I now see your comment (which should be edited into the question since it is critical to getting the right answer) that you have extremely limited memory. Then you can use this:
static uint32_t crc32(uint32_t crc, unsigned char *buf, size_t len)
{
int k;
crc = ~crc;
while (len--) {
crc ^= *buf++;
for (k = 0; k < 8; k++)
crc = crc & 1 ? (crc >> 1) ^ 0xedb88320 : crc >> 1;
}
return ~crc;
}
The crc is initially set to zero.
The use of ~ will give the correct result, since the uint32_t type in stdint.h is assured to be 32 bits.
If you can afford a little more code space, then unrolling the loop will likely speed it up (if the compiler doesn't already do this):
static uint32_t crc32(uint32_t crc, unsigned char *buf, size_t len)
{
crc = ~crc;
while (len--) {
crc ^= *buf++;
crc = crc & 1 ? (crc >> 1) ^ 0xedb88320 : crc >> 1;
crc = crc & 1 ? (crc >> 1) ^ 0xedb88320 : crc >> 1;
crc = crc & 1 ? (crc >> 1) ^ 0xedb88320 : crc >> 1;
crc = crc & 1 ? (crc >> 1) ^ 0xedb88320 : crc >> 1;
crc = crc & 1 ? (crc >> 1) ^ 0xedb88320 : crc >> 1;
crc = crc & 1 ? (crc >> 1) ^ 0xedb88320 : crc >> 1;
crc = crc & 1 ? (crc >> 1) ^ 0xedb88320 : crc >> 1;
crc = crc & 1 ? (crc >> 1) ^ 0xedb88320 : crc >> 1;
}
return ~crc;
}
You said that you only have 4 KBytes of "memory". Is that just working memory for the program, or does the program have to live there as well? If you have more space in flash for example for the code, then the table can be precomputed and stored with the code. A table-driven CRC will be much faster. The zlib code provides table-driven CRCs that do one byte at a time and four-bytes at a time, requiring respectively a 1Kbyte or 4Kbyte table.
Update 2:
Since you answered in a comment that the 4KBytes are just working memory, then you should use a table-driven CRC. You can simply use the crc32() function in zlib's crc32.c and the table in crc32.h with BYFOUR undefined.
C:
UInt32
crc32(UInt32 crc, UInt8 *p, SInt len)
{
crc = ~crc;
while (--len >= 0) {
crc = crc ^ *p++;
for (SInt i = 8; --i >= 0;) {
crc = (crc >> 1) ^ (0xedb88320 & -(crc & 1));
}
}
return ~crc;
}
void
crc_unitTest(void)
{
UInt8 b1[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
UInt8 b2[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
UInt8 b3[] = { 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0 };
assert(crc32(0, b1, 8) == 0x6522df69);
assert(crc32(0, b2, 10) == 0x456cd746);
assert(crc32(0, b3, 8) == 0xea8c89c0);
}
Python:
def crc32(crc, p, len):
crc = 0xffffffff & ~crc
for i in range(len):
crc = crc ^ p[i]
for j in range(8):
crc = (crc >> 1) ^ (0xedb88320 & -(crc & 1))
return 0xffffffff & ~crc
def unitTest():
b1 = [ 0, 0, 0, 0, 0, 0, 0, 0 ]
b2 = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]
b3 = [ 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0 ]
assert(crc32(0, b1, 8) == 0x6522df69)
assert(crc32(0, b2, 10) == 0x456cd746)
assert(crc32(0, b3, 8) == 0xea8c89c0)
As you need a C implementation that doesn't use a lookup table (which most implementations do) with a matching Python equivalent you could use ZIP's CRC32 as suggested by Mark Ransom ( binascii.crc32 ) and the matching, tableless implementation I borrowed here
/* Calculating ZIP CRC-32 in 'C'
=============================
Reference model for the translated code */
#define poly 0xEDB88320
/* Some compilers need
#define poly 0xEDB88320uL
*/
/* On entry, addr=>start of data
num = length of data
crc = incoming CRC */
int crc32(char *addr, int num, int crc)
{
int i;
for (; num>0; num--) /* Step through bytes in memory */
{
crc = crc ^ *addr++; /* Fetch byte from memory, XOR into CRC */
for (i=0; i<8; i++) /* Prepare to rotate 8 bits */
{
if (crc & 1) /* b0 is set... */
crc = (crc >> 1) ^ poly; /* rotate and XOR with ZIP polynomic */
else /* b0 is clear... */
crc >>= 1; /* just rotate */
/* Some compilers need:
crc &= 0xFFFFFFFF;
*/
} /* Loop for 8 bits */
} /* Loop until num=0 */
return(crc); /* Return updated CRC */
}
EDIT: as several people pointed out there are issues with the above code, the one below matches Wikipedia's (see http://ideone.com/pWLVSo ) and Python's ( http://ideone.com/SvYuyE - 1277644989==0x4c2750bd). This code comes from this page where other implementations and possible improvements over the basic version I copied
const uint32_t Polynomial = 0xEDB88320;
uint32_t crc32_bitwise(const void* data, size_t length, uint32_t previousCrc32 = 0) {
uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
unsigned char* current = (unsigned char*) data;
while (length--) {
crc ^= *current++;
for (unsigned int j = 0; j < 8; j++) {
if (crc & 1)
crc = (crc >> 1) ^ Polynomial;
else
crc = crc >> 1;
}
}
return ~crc; // same as crc ^ 0xFFFFFFFF
}

Trying to import a python snippet to C/C++ (PI spigot algorithm)

Some time ago (I can't remember where) I find this python snippet which implents a spigot algorithm for calculating digits of Pi:
def pi_digits():
"""generator for digits of pi"""
q,r,t,k,n,l = 1,0,1,1,3,3
while True:
if 4*q+r-t < n*t:
yield n
q,r,t,k,n,l = (10*q,10*(r-n*t),t,k,(10*(3*q+r))/t-10*n,l)
else:
q,r,t,k,n,l = (q*k,(2*q+r)*l,t*l,k+1,(q*(7*k+2)+r*l)/(t*l),l+2)
digits = pi_digits()
for i in range(30): print digits.next()
Now I wanna implent this in C++. My try was:
#include <cmath>
#include <cstdlib>
#include <iostream>
typedef long long ll;
void help() {
std::cout << "Usage: pi2 <digits>" << std::endl;
exit(1);
}
void pi(const long long digits) {
ll q, r, t, k, n, l;
q=1;
r=0;
t=1;
k=1;
n=3;
l=3;
for(ll i=0; i<digits; ++i) {
if(4*q+r-t < n*t) {
std::cout << n;
q=10*q;
r=10*(r-n*t);
n = ( 10 * ( 3 * q + r) / t ) - 10 * n; //Thanks to maverik
} else {
q=q*k;
r=(2*q+r)*l;
t=t*l;
k=k+1;
n=(q*(7*k+2)+r*l)/(t*l);
l=l+2;
}
}
}
int main(int argc, char** argv) {
if(argc<2) help();
ll digits = 0;
if(digits=atoll(argv[1])<1) help();
pi(digits);
return 0;
}
But it never calls std::cout::operator<<, while the python version works.
Can you help me?
Thanks.
The reason is your code not performing the equivalent calculations in the two languages.
There are(as far as I see) two reasons for this:
In this python code, all the calculations are done at the same time:
q,r,t,k,n,l = (q*k,(2*q+r)*l,t*l,k+1,(q*(7*k+2)+r*l)/(t*l),l+2)
In the C code, the calculations are performed one at a time, so every one uses the result of the previous ones instead of using the old values(like the python code does).
You're using ints in python, and long longs in C.
The division in C code will produce long longs, while those in python(assuming python 2), will produce rounded-down ints.
This could also create miscalculations which can cause your condition to never be true.
P.S.
Implementing this in C from scratch is probably a better idea than porting a python algorithm.
Looks like there should be (according to the python code):
n = ( 10 * ( 3 * q + r) / t ) - 10 * n;
And there:
if ( 4 * q + r - t < n * t) ...
Or I've missed something?

Need help porting C function to Python

I'm trying to port a C function which calculates a GPS checksum over to Python. According to the receiving end I am sometimes miscalculating the checksum, so must still have a bug in there.
C code is
void ComputeAsciiChecksum(unsigned char *data, unsigned int len,
unsigned char *p1, unsigned char *p2)
{
unsigned char c,h,l;
assert(Stack_Low());
c = 0;
while (len--) {
c ^= *data++;
}
h = (c>>4);
l = c & 0xf;
h += '0';
if (h > '9') {
h += 'A'-'9'-1;
}
l += '0';
if (l > '9') {
l += 'A'-'9'-1;
}
*p1 = h;
*p2 = l;
}
My attempt at a Python function is
def calcChecksum(line):
c = 0
i = 0
while i < len(line):
c ^= ord(line[i]) % 256
i += 1
return '%02X' % c;
Here is how you can set up a testing environment to diagnose your problem.
Copy the above C function to a file, remove the assert() line, and compile it to a shared library with
gcc -shared -o checksum.so checksum.c
(If you are on Windows or whatever, do the equivalent of the above.)
Copy this code to a Python file:
import ctypes
import random
c = ctypes.CDLL("./checksum.so")
c.ComputeAsciiChecksum.rettype = None
c.ComputeAsciiChecksum.argtypes = [ctypes.c_char_p, ctypes.c_uint,
ctypes.c_char_p, ctypes.c_char_p]
def compute_ascii_checksum_c(line):
p1 = ctypes.create_string_buffer(1)
p2 = ctypes.create_string_buffer(1)
c.ComputeAsciiChecksum(line, len(line), p1, p2)
return p1.value + p2.value
def compute_ascii_checksum_py(line):
c = 0
i = 0
while i < len(line):
c ^= ord(line[i]) % 256
i += 1
return '%02X' % c;
Now you have access to both versions of the checksum function and can compare the results. I wasn't able to find any differences.
(BTW, how are you computing the length of the string in C? If you are using strlen(), this would stop at NUL bytes.)
As a side note, your Python version isn't really idiomatic Python. Here are two more idiomatic versions:
def compute_ascii_checksum_py(line):
checksum = 0
for c in line:
checksum ^= ord(c)
return "%02X" % checksum
or
def compute_ascii_checksum_py(line):
return "%02X" % reduce(operator.xor, map(ord, line))
Note that these implementations should do exactly the same as yours.
Have you checked out this cookbook recipe? It hints at what input you should include in "line", returns a asterisk in front of the checksum, and gives one (input, output) data pair that you can use as test data.
Are you sure that "the receiver" is working correctly? Is the problem due to upper vs lower case hex letters?

Categories