Primality test Python C extension is slower than pure Python - python

I've implemented a 6k+-1 primality test function both in a C extension and pure Python code but seems pure Python code is much faster! is there something wrong with my C code or something else?
I also compiled a similar test in pure C with the is_prime function, and its execution time was the same as the C extension (almost 2sec)
primemodule.c
#define PY_SSIZE_T_CLEAN
#include "Python.h"
int is_prime(int n)
{
int i;
if (n <= 3)
return (n > 1);
if (n % 2 == 0 || n % 3 == 0)
return (0);
i = 5;
while ((i * i) <= n)
{
if (n % i == 0 || n % (i + 2) == 0)
return (0);
i += 6;
}
return (1);
}
static PyObject *prime_isprime(PyObject *self, PyObject *args)
{
int n;
if (!PyArg_ParseTuple(args, "i", &n))
return (NULL);
if (is_prime(n))
Py_RETURN_TRUE;
Py_RETURN_FALSE;
}
static PyMethodDef prime_methods[] = {
{"is_prime", prime_isprime, METH_VARARGS, "Check if a number is prime"},
{NULL, NULL, 0, NULL}};
static struct PyModuleDef prime_module = {
PyModuleDef_HEAD_INIT,
"prime",
NULL,
-1,
prime_methods};
PyMODINIT_FUNC PyInit_prime(void)
{
return (PyModule_Create(&prime_module));
}
py_test.py
import time
MAX_INT = 2147483647
def is_prime(n: int) -> bool:
if n <= 3:
return n > 1
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i ** 2 <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
t1 = time.process_time()
for i in range(MAX_INT - 100, MAX_INT):
is_prime(i)
print(time.process_time() - t1, "seconds")
c_test.py
import time
import prime
MAX_INT = 2147483647
t1 = time.process_time()
for i in range(MAX_INT - 100, MAX_INT):
prime.is_prime(i)
print(time.process_time() - t1, "seconds")
python c_test.py
2.078125 seconds
python py_test.py
0.03125 seconds
timecmd.bat a.exe
2.13 seconds

I think your C implementation is buggy regarding integer overflows and signedness and ends up in a bigger loop than the Python version.
Changing the parameter type to unsigned int (and i too, since otherwise that's a compiler warning):
static int is_prime(unsigned int n)
{
unsigned int i;
if (n <= 3)
return (n > 1);
if (n == 2 || n == 3)
return (1);
if (n % 2 == 0 || n % 3 == 0)
return (0);
i = 5;
while ((i * i) <= n)
{
if (n % i == 0 || n % (i + 2) == 0)
return (0);
i += 6;
}
return (1);
}
makes it (anecdotally, on my machine, approximately) 37 times faster than the Python implementation.

Related

How to print a string pattern?

I have a string with characters and a number for the rows and columns that will be in the pattern:
char_str1 = 'abc'
char_str1 = '31452'
num = 5
I would like the output to be:
abcab 31452
bcabc 14523
cabca 45231
abcab 52314
bcabc 23145
I have tried doing:
for i in range(num):
for j in range(num):
print(char_str1, end='')
print()
output:
abcabcabcabcabc
abcabcabcabcabc
abcabcabcabcabc
abcabcabcabcabc
abcabcabcabcabc
If you replicate the strings at least num times, simple slicing works. The original strings need to be at least length 1 of course:
char_str1 = 'abc'
char_str2 = '31452' # You had a typo here st1 instead of str2
num = 5
a = char_str1 * num
b = char_str2 * num
for i in range(num):
print(a[i:i+num], b[i:i+num])
Output:
abcab 31452
bcabc 14523
cabca 45231
abcab 52314
bcabc 23145
Please use the below code in java for your pattern!!
JAVA
public class Main {
public static void printPattern(String s, int n) {
for (int i = 0; i < n; i++) {
for (int j = i; j < n + i; j++) {
System.out.print(s.charAt(j % s.length()));
}
System.out.println();
}
}
public static void main(String[] args) {
printPattern("abc", 5);
printPattern("31452", 5);
}
}
EDIT:
Python
def printPattern(s, n):
for i in range(n):
for j in range(i, n + i):
print(s[j % len(s)], end='')
print()
printPattern("abc", 5);
printPattern("31452", 5);

Creating a snowflake in Python

I am trying to create a program in Python that creates a snowflake based on the input of a number. Below is my code:
n = int(input())
a = [["."] * n] * n
temp = n/2
start_point = 0
mid_point = int(temp)
end_point = n - 1
for i in range(n):
if i > mid_point + 1:
start_point -= 1
end_point += 1
for j in range(n):
if (j == start_point) or (j == mid_point) or (j == end_point) or (i == mid_point):
a[i][j] = "*"
else:
a[i][j] = "."
if i < mid_point - 1:
start_point += 1
end_point -= 1
for row in a:
print(' '.join([str(elem) for elem in row]))
For example, if the input is '5' the output should look like:
* . * . *
. * * * .
* * * * *
. * * * .
* . * . *
However, my output looks like:
. * * * .
. * * * .
. * * * .
. * * * .
. * * * .
I was sure that my code was correct so I rewrote it in Java as:
public class Snowflake {
public static void createSnowflake(int n) {
String[][] array = new String[n][n];
float temp = (float) (n/2);
System.out.println(temp);
int start_point = 0;
int mid_point = (int) (temp);
System.out.println(mid_point);
int end_point = n - 1;
for(int i = 0; i < n; i++) {
if(i > mid_point+1) {
start_point--;
end_point++;
}
for(int j = 0; j < n; j++) {
if((j == start_point) || (j == mid_point) || (j == end_point) || (i == mid_point)) {
array[i][j] = "*";
}
else {
array[i][j] = ".";
}
}
if(i < mid_point-1) {
start_point++;
end_point--;
}
}
for(int i = 0; i < n; i++) {
for(int j = 0; j < n; j++) {
System.out.print(array[i][j]);
}
System.out.print("\n");
}
}
public static void main(String[] args) {
createSnowflake(5);
}
}
And it worked as expected. To my eyes the underlying logic is exactly the same, and yet the Java code works and the Python code doesn't. Could someone help me find where I've made a mistake in the Python syntax or how my Java code somehow differs from it?
If you change the creation of a to:
a= [["." for j in range(n)] for i in range(n)]
it should fix it.
This has to do with the way python copies lists.
Check the question linked on the comments to your question.
Enjoyed this question, I feel like it could only be here during this time of the year.

How to sort points along a Hilbert curve without using Hilbert indices?

I'm trying to implement the algorithm described in the paper Fast Hilbert Sort Algorithm Without Using Hilbert Indices (https://www.researchgate.net/profile/Takeshi_Shinohara/publication/313074453_Fast_Hilbert_Sort_Algorithm_Without_Using_Hilbert_Indices/links/5b8468bd299bf1d5a72b9a0c/Fast-Hilbert-Sort-Algorithm-Without-Using-Hilbert-Indices.pdf?origin=publication_detail), but I can't get the right results.
Below is my python code (For bitset and it's member functions flip and test in C++ , please refer to https://en.cppreference.com/w/cpp/utility/bitset):
N=9 # 9 points
n=2 # 2 dimension
m=3 # order of Hilbert curve
b=m-1
def BitTest(x,od,maxlen=3):
bit=format(x,'b').zfill(maxlen)
return int(bit[maxlen-1-od])
def BitFlip(b,pos,):
b ^= 1 << pos
return b
def partition(A,st,en,od,ax,di):
i = st
j = en
while True:
while i < j and BitTest(A[i][ax],od)==di:
i = i + 1
while i < j and BitTest(A[j][ax],od)!=di:
j = j - 1
if i >= j:
return i
A[i], A[j] = A[j], A[i]
def HSort(A,st,en,od,c,e,d,di,cnt):
if en<=st:
return
p =partition(A,st,en,od,(d+c)%n,BitTest(e,(d+c)%n))
if c==n-1:
if b==0:
return
d2= (d+n+n-(di if(di==2) else cnt+2))%n
e=BitFlip(e,d2)
e=BitFlip(e,(d+c)%n)
HSort(A,st,p-1,b-1,0,e,d2,False,0)
e=BitFlip(e,(d+c)%n)
e=BitFlip(e,d2)
d2= (d+n+n-(di if(di==cnt+2) else 2))%n
HSort(A,p+1,en,b-1,0,e,d2,False,0)
else:
HSort(A,st,p-1,b,c+1,e,d,False,(di if(di==1) else cnt+1))
e=BitFlip(e,(d+c)%n)
e=BitFlip(e,(d+c+1)%n)
HSort(A,p+1,en,b,c+1,e,d,True,(di if(di==cnt+1) else 1))
e=BitFlip(e,(d+c+1)%n)
e=BitFlip(e,(d+c)%n)
array = [[2,2],[2,4],[3,4],[2,5],[3,5],[1,6],[3,6],[5,6],[3,7]]
HSort(array,st=0,en=N-1,od=m-1,c=0,e=0,d=0,di=False,cnt=0)
print(array)
That document has a typo, the constant "b" should be replaced with "od".
Here is a working code in c++:
#include <iostream>
#include <vector>
#include <array>
constexpr std::int32_t m = 3;
constexpr std::int32_t n = 2;
bool test_bit(std::int32_t value, std::int32_t pos)
{
const auto result = value & (1 << pos);
return result;
}
void flip_bit(std::int32_t &value, std::int32_t pos)
{
value ^= 1 << pos;
}
std::int32_t partition(std::vector<std::array<std::int32_t, 2>> &A, std::size_t st, std::size_t en, std::int32_t od, std::int32_t ax, bool di)
{
std::int32_t i = st - 1;
std::int32_t j = en + 1;
while(true)
{
do
i = i + 1;
while(i < j && test_bit(A[i][ax], od) == di);
do
j = j - 1;
while(i < j && test_bit(A[j][ax], od) != di);
if(j <= i)
return i; //partition is complete
std::swap(A[i], A[j]);
}
}
void hilbert_sort(std::vector<std::array<std::int32_t, 2>> &A, std::size_t st, std::size_t en, std::int32_t od, std::int32_t c, std::int32_t &e, std::int32_t
d, bool di, std::int32_t cnt)
{
std::int32_t p;
std::int32_t d2;
if(en <= st)
return;
p = partition(A, st, en, od, (d + c) % n, test_bit(e, (d + c) % n));
if(c == n - 1)
{
if(od == 0)
return;
d2 = (d + n + n - (di ? 2 : cnt + 2)) % n;
flip_bit(e, d2);
flip_bit(e, (d + c) % n);
hilbert_sort(A, st, p - 1, od - 1, 0, e, d2, false, 0);
flip_bit(e, (d + c) % n);
flip_bit(e, d2);
d2 = (d + n + n - (di ? cnt + 2 : 2)) % n;
hilbert_sort(A, p, en, od - 1, 0, e, d2, false, 0);
}
else
{
hilbert_sort(A, st, p - 1, od, c + 1, e, d, false, di ? 1 : cnt + 1);
flip_bit(e, (d + c) % n);
flip_bit(e, (d + c + 1) % n);
hilbert_sort(A, p, en, od, c + 1, e, d, true, di ? cnt + 1 : 1);
flip_bit(e, (d + c + 1) % n);
flip_bit(e, (d + c) % n);
}
}
int main()
{
std::vector<std::array<std::int32_t, 2>> points = {{2,2},{2,4},{3,4},{2,5},{3,5},{1,6},{3,6},{5,6},{3,7}};
std::int32_t e = 0;
hilbert_sort(points, 0, points.size() - 1, m - 1, 0, e, 0, false , 0);
for(const auto &point : points)
std::clog << "(" << point[0] << ", " << point[1] << ")\n";
return 0;
}
You also seems to have a typo "p+1" it should be just "p".
Here is a working python code:
N=9 # 9 points
n=2 # 2 dimension
m=3 # order of Hilbert curve
def BitTest(x,od):
result = x & (1 << od)
return int(bool(result))
def BitFlip(b,pos):
b ^= 1 << pos
return b
def partition(A,st,en,od,ax,di):
i = st
j = en
while True:
while i < j and BitTest(A[i][ax],od) == di:
i = i + 1
while i < j and BitTest(A[j][ax],od) != di:
j = j - 1
if j <= i:
return i
A[i], A[j] = A[j], A[i]
def HSort(A,st,en,od,c,e,d,di,cnt):
if en<=st:
return
p = partition(A,st,en,od,(d+c)%n,BitTest(e,(d+c)%n))
if c==n-1:
if od==0:
return
d2= (d+n+n-(2 if di else cnt + 2)) % n
e=BitFlip(e,d2)
e=BitFlip(e,(d+c)%n)
HSort(A,st,p-1,od-1,0,e,d2,False,0)
e=BitFlip(e,(d+c)%n)
e=BitFlip(e,d2)
d2= (d+n+n-(cnt + 2 if di else 2))%n
HSort(A,p,en,od-1,0,e,d2,False,0)
else:
HSort(A,st,p-1,od,c+1,e,d,False,(1 if di else cnt+1))
e=BitFlip(e,(d+c)%n)
e=BitFlip(e,(d+c+1)%n)
HSort(A,p,en,od,c+1,e,d,True,(cnt+1 if di else 1))
e=BitFlip(e,(d+c+1)%n)
e=BitFlip(e,(d+c)%n)
array = [[2,2],[2,4],[3,4],[2,5],[3,5],[1,6],[3,6],[5,6],[3,7]]
HSort(array,st=0,en=N-1,od=m-1,c=0,e=0,d=0,di=False,cnt=0)
print(array)

Cython: for i from 1 <= i < N

I'm learning Cython and came across this snippit of code:
import numpy as np
cimport numpy as np
def mean(np.ndarray[np.double_t] input):
cdef np.double_t cur
# Py_ssize_t is numpy's index type
cdef Py_ssize_t i
cdef Py_ssize_t N = len(input)
for i from 0 <= i < N:
cur += input[i]
return cur / N
a=np.array([1,2,3,4], dtype=np.double)
Obviously, this returns the mean of a which is 2.5. My question is this:
Is the for loop a Python loop, Cython, or C?
Compile it and see: the C code that Cython produces is nicely annotated.
/* "cyexample.pyx":11
* cdef Py_ssize_t N = len(input)
*
* for i from 0 <= i < N: # <<<<<<<<<<<<<<
* cur += input[i]
*
*/
__pyx_t_1 = __pyx_v_N;
for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i++) {
/* "cyexample.pyx":12
*
* for i from 0 <= i < N:
* cur += input[i] # <<<<<<<<<<<<<<
*
* return cur / N
*/
__pyx_t_2 = __pyx_v_i;
__pyx_t_3 = -1;
if (__pyx_t_2 < 0) {
__pyx_t_2 += __pyx_bshape_0_input;
if (unlikely(__pyx_t_2 < 0)) __pyx_t_3 = 0;
} else if (unlikely(__pyx_t_2 >= __pyx_bshape_0_input)) __pyx_t_3 = 0;
if (unlikely(__pyx_t_3 != -1)) {
__Pyx_RaiseBufferIndexError(__pyx_t_3);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
}
__pyx_v_cur = (__pyx_v_cur + (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_double_t *, __pyx_bstruct_input.buf, __pyx_t_2, __pyx_bstride_0_input)));
}
And so the loop itself is successfully turned into C. Note that these days Cython can handle range naturally, so the older "from 0 <= i < N" style isn't necessary. The point of introducing the (non-Python) "for/from" syntax was to signify which loops should be C-ified.
for..from seems to be a Pyrex / Cython loop: http://docs.cython.org/src/userguide/language_basics.html#integer-for-loops

What algorithm does Python employ in fractions.gcd()?

I'm using the fractions module in Python v3.1 to compute the greatest common divisor. I would like to know what algorithm is used. I'm guessing the Euclidean method, but would like to be sure. The docs (http://docs.python.org/py3k/library/fractions.html?highlight=fractions.gcd#fractions.gcd) don't help. Can anybody clue me in?
According to the 3.1.2 source code online, here's gcd as defined in Python-3.1.2/Lib/fractions.py:
def gcd(a, b):
"""Calculate the Greatest Common Divisor of a and b.
Unless b==0, the result will have the same sign as b (so that when
b is divided by it, the result comes out positive).
"""
while b:
a, b = b, a%b
return a
So yes, it's the Euclidean algorithm, written in pure Python.
From fractions python
"Deprecated since version 3.5: Use math.gcd() instead."
I was looking for the algorithm as well. I hope it helped.
Since Python 3.5, the GCD code has been moved to math.gcd. Since Python 3.9, math.gcd takes an arbitrary number of arguments.
The actual GCD code is now implemented in C (for CPython), making it significantly faster than the original pure Python implementation.
Boilerplate:
static PyObject *
math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
{
PyObject *res, *x;
Py_ssize_t i;
if (nargs == 0) {
return PyLong_FromLong(0);
}
res = PyNumber_Index(args[0]);
if (res == NULL) {
return NULL;
}
if (nargs == 1) {
Py_SETREF(res, PyNumber_Absolute(res));
return res;
}
PyObject *one = _PyLong_GetOne(); // borrowed ref
for (i = 1; i < nargs; i++) {
x = _PyNumber_Index(args[i]);
if (x == NULL) {
Py_DECREF(res);
return NULL;
}
if (res == one) {
/* Fast path: just check arguments.
It is okay to use identity comparison here. */
Py_DECREF(x);
continue;
}
Py_SETREF(res, _PyLong_GCD(res, x));
Py_DECREF(x);
if (res == NULL) {
return NULL;
}
}
return res;
}
The actual computation uses Lehmer's GCD algorithm:
PyObject *
_PyLong_GCD(PyObject *aarg, PyObject *barg)
{
PyLongObject *a, *b, *c = NULL, *d = NULL, *r;
stwodigits x, y, q, s, t, c_carry, d_carry;
stwodigits A, B, C, D, T;
int nbits, k;
Py_ssize_t size_a, size_b, alloc_a, alloc_b;
digit *a_digit, *b_digit, *c_digit, *d_digit, *a_end, *b_end;
a = (PyLongObject *)aarg;
b = (PyLongObject *)barg;
size_a = Py_SIZE(a);
size_b = Py_SIZE(b);
if (-2 <= size_a && size_a <= 2 && -2 <= size_b && size_b <= 2) {
Py_INCREF(a);
Py_INCREF(b);
goto simple;
}
/* Initial reduction: make sure that 0 <= b <= a. */
a = (PyLongObject *)long_abs(a);
if (a == NULL)
return NULL;
b = (PyLongObject *)long_abs(b);
if (b == NULL) {
Py_DECREF(a);
return NULL;
}
if (long_compare(a, b) < 0) {
r = a;
a = b;
b = r;
}
/* We now own references to a and b */
alloc_a = Py_SIZE(a);
alloc_b = Py_SIZE(b);
/* reduce until a fits into 2 digits */
while ((size_a = Py_SIZE(a)) > 2) {
nbits = bit_length_digit(a->ob_digit[size_a-1]);
/* extract top 2*PyLong_SHIFT bits of a into x, along with
corresponding bits of b into y */
size_b = Py_SIZE(b);
assert(size_b <= size_a);
if (size_b == 0) {
if (size_a < alloc_a) {
r = (PyLongObject *)_PyLong_Copy(a);
Py_DECREF(a);
}
else
r = a;
Py_DECREF(b);
Py_XDECREF(c);
Py_XDECREF(d);
return (PyObject *)r;
}
x = (((twodigits)a->ob_digit[size_a-1] << (2*PyLong_SHIFT-nbits)) |
((twodigits)a->ob_digit[size_a-2] << (PyLong_SHIFT-nbits)) |
(a->ob_digit[size_a-3] >> nbits));
y = ((size_b >= size_a - 2 ? b->ob_digit[size_a-3] >> nbits : 0) |
(size_b >= size_a - 1 ? (twodigits)b->ob_digit[size_a-2] << (PyLong_SHIFT-nbits) : 0) |
(size_b >= size_a ? (twodigits)b->ob_digit[size_a-1] << (2*PyLong_SHIFT-nbits) : 0));
/* inner loop of Lehmer's algorithm; A, B, C, D never grow
larger than PyLong_MASK during the algorithm. */
A = 1; B = 0; C = 0; D = 1;
for (k=0;; k++) {
if (y-C == 0)
break;
q = (x+(A-1))/(y-C);
s = B+q*D;
t = x-q*y;
if (s > t)
break;
x = y; y = t;
t = A+q*C; A = D; B = C; C = s; D = t;
}
if (k == 0) {
/* no progress; do a Euclidean step */
if (l_mod(a, b, &r) < 0)
goto error;
Py_SETREF(a, b);
b = r;
alloc_a = alloc_b;
alloc_b = Py_SIZE(b);
continue;
}
/*
a, b = A*b-B*a, D*a-C*b if k is odd
a, b = A*a-B*b, D*b-C*a if k is even
*/
if (k&1) {
T = -A; A = -B; B = T;
T = -C; C = -D; D = T;
}
if (c != NULL) {
Py_SET_SIZE(c, size_a);
}
else if (Py_REFCNT(a) == 1) {
c = (PyLongObject*)Py_NewRef(a);
}
else {
alloc_a = size_a;
c = _PyLong_New(size_a);
if (c == NULL)
goto error;
}
if (d != NULL) {
Py_SET_SIZE(d, size_a);
}
else if (Py_REFCNT(b) == 1 && size_a <= alloc_b) {
d = (PyLongObject*)Py_NewRef(b);
Py_SET_SIZE(d, size_a);
}
else {
alloc_b = size_a;
d = _PyLong_New(size_a);
if (d == NULL)
goto error;
}
a_end = a->ob_digit + size_a;
b_end = b->ob_digit + size_b;
/* compute new a and new b in parallel */
a_digit = a->ob_digit;
b_digit = b->ob_digit;
c_digit = c->ob_digit;
d_digit = d->ob_digit;
c_carry = 0;
d_carry = 0;
while (b_digit < b_end) {
c_carry += (A * *a_digit) - (B * *b_digit);
d_carry += (D * *b_digit++) - (C * *a_digit++);
*c_digit++ = (digit)(c_carry & PyLong_MASK);
*d_digit++ = (digit)(d_carry & PyLong_MASK);
c_carry >>= PyLong_SHIFT;
d_carry >>= PyLong_SHIFT;
}
while (a_digit < a_end) {
c_carry += A * *a_digit;
d_carry -= C * *a_digit++;
*c_digit++ = (digit)(c_carry & PyLong_MASK);
*d_digit++ = (digit)(d_carry & PyLong_MASK);
c_carry >>= PyLong_SHIFT;
d_carry >>= PyLong_SHIFT;
}
assert(c_carry == 0);
assert(d_carry == 0);
Py_INCREF(c);
Py_INCREF(d);
Py_DECREF(a);
Py_DECREF(b);
a = long_normalize(c);
b = long_normalize(d);
}
Py_XDECREF(c);
Py_XDECREF(d);
simple:
assert(Py_REFCNT(a) > 0);
assert(Py_REFCNT(b) > 0);
/* Issue #24999: use two shifts instead of ">> 2*PyLong_SHIFT" to avoid
undefined behaviour when LONG_MAX type is smaller than 60 bits */
#if LONG_MAX >> PyLong_SHIFT >> PyLong_SHIFT
/* a fits into a long, so b must too */
x = PyLong_AsLong((PyObject *)a);
y = PyLong_AsLong((PyObject *)b);
#elif LLONG_MAX >> PyLong_SHIFT >> PyLong_SHIFT
x = PyLong_AsLongLong((PyObject *)a);
y = PyLong_AsLongLong((PyObject *)b);
#else
# error "_PyLong_GCD"
#endif
x = Py_ABS(x);
y = Py_ABS(y);
Py_DECREF(a);
Py_DECREF(b);
/* usual Euclidean algorithm for longs */
while (y != 0) {
t = y;
y = x % y;
x = t;
}
#if LONG_MAX >> PyLong_SHIFT >> PyLong_SHIFT
return PyLong_FromLong(x);
#elif LLONG_MAX >> PyLong_SHIFT >> PyLong_SHIFT
return PyLong_FromLongLong(x);
#else
# error "_PyLong_GCD"
#endif
error:
Py_DECREF(a);
Py_DECREF(b);
Py_XDECREF(c);
Py_XDECREF(d);
return NULL;
}

Categories