I'm trying to implement A Radix-5,Radix-3 FFT in C++, I already managed to write a Radix-2 but I have some type of bug when it comes to Radix 3 or 5, let's say I do an FFT of 3 samples, that would show the correct results, however if I do FFT of 9 which is 3 * 3, it doesn't show the correct results.
I originally took the code from Python, it works there and I'm trying to simply "copy" it to C++, Here is the original Python code:
def fft(x):
"""
radix-2,3,5 FFT algorithm
"""
N = len(x)
if N <= 1:
return x
elif N % 2 == 0:
# For multiples of 2 this formula works
even = fft(x[0::2])
odd = fft(x[1::2])
T = [np.exp(-2j*np.pi*k/N)*odd[k] for k in range(N//2)]
return [even[k] + T[k] for k in range(N//2)] + \
[even[k] - T[k] for k in range(N//2)]
elif N % 3 == 0:
# Optional, implementing factor 3 decimation
p0 = fft(x[0::3])
p1 = fft(x[1::3])
p2 = fft(x[2::3])
# This will construct the output output without the simplifications
# you can do explorint symmetry
for k in range(N):
return [p0[k % (N//3)] +
p1[k % (N//3)] * np.exp(-2j*np.pi*k/N) +
p2[k % (N//3)] * np.exp(-4j*np.pi*k/N)]
elif N % 5 == 0:
#factor 5 decimation
p0 = fft(x[0::5])
p1 = fft(x[1::5])
p2 = fft(x[2::5])
p3 = fft(x[3::5])
p4 = fft(x[4::5])
return [p0[k % (N//5)] +
p1[k % (N//5)] * np.exp(-2j*np.pi*k/N) +
p2[k % (N//5)] * np.exp(-4j*np.pi*k/N) +
p3[k % (N//5)] * np.exp(-6j*np.pi*k/N) +
p4[k % (N//5)] * np.exp(-8j*np.pi*k/N)
for k in range(N)]
x = [1,1.00071,1.00135,1.00193,1.00245,1.0029,1.00329,1.00361,1.00387]
assert(np.allclose(fft(x), np.fft.fft(x)))
And here is my code in C++ (fft.hpp):
#define _USE_MATH_DEFINES
#pragma once
#include <cmath>
#include <vector>
#include <complex>
using std::vector;
using std::complex;
vector<complex<float>> slicing(vector<complex<float>> vec, unsigned int X, unsigned int Y, unsigned int stride)
{
// To store the sliced vector
vector<complex<float>> result;
// Copy vector using copy function()
int i = X;
while (result.size() < Y)
{
result.push_back(vec[i]);
i = i + stride;
}
// Return the final sliced vector
return result;
}
void fft(vector<complex<float>>& x)
{
// Check if it is splitted enough
const size_t N = x.size();
if (N <= 1)
return;
else if (N % 2 == 0)
{
//Radix-2
vector<complex<float>> even = slicing(x, 0, N / 2, 2); //split the inputs in even / odd indices subarrays
vector<complex<float>> odd = slicing(x, 1, N / 2, 2);
// conquer
fft(even);
fft(odd);
// combine
for (size_t k = 0; k < N / 2; ++k)
{
complex<float> t = std::polar<float>(1.0, -2 * M_PI * k / N) * odd[k];
x[k] = even[k] + t;
x[k + N / 2] = even[k] - t;
}
}
else if (N % 3 == 0)
{
//Radix-3
//factor 3 decimation
vector<complex<float>> p0 = slicing(x, 0, N / 3, 3);
vector<complex<float>> p1 = slicing(x, 1, N / 3, 3);
vector<complex<float>> p2 = slicing(x, 2, N / 3, 3);
fft(p0);
fft(p1);
fft(p2);
for (int i = 0; i < N; i++)
{
complex<float> temp = p0[i % (int)N / 3];
temp += (p1[i % (int)N / 3] * std::polar<float>(1.0, -2 * M_PI * i / N));
temp += (p2[i % (int)N / 3] * std::polar<float>(1.0, -4 * M_PI * i / N));
x[i] = temp;
}
}
else if (N % 5 == 0)
{
//Radix-5
//factor 5 decimation
vector<complex<float>> p0 = slicing(x, 0, N / 5, 5);
vector<complex<float>> p1 = slicing(x, 1, N / 5, 5);
vector<complex<float>> p2 = slicing(x, 2, N / 5, 5);
vector<complex<float>> p3 = slicing(x, 3, N / 5, 5);
vector<complex<float>> p4 = slicing(x, 4, N / 5, 5);
fft(p0);
fft(p1);
fft(p2);
fft(p3);
fft(p4);
for (int i = 0; i < N; i++)
{
complex<float> temp = p0[i % (int)N / 5];
temp += (p1[i % (int)N / 5] * std::polar<float>(1.0, -2 * M_PI * i / N));
temp += (p2[i % (int)N / 5] * std::polar<float>(1.0, -4 * M_PI * i / N));
temp += (p3[i % (int)N / 5] * std::polar<float>(1.0, -6 * M_PI * i / N));
temp += (p4[i % (int)N / 5] * std::polar<float>(1.0, -8 * M_PI * i / N));
x[i] = temp;
}
}
}
And main.cpp:
#define _USE_MATH_DEFINES
#include <stdio.h>
#include <iostream>
#include "fft.hpp"
typedef vector<complex<float>> complexSignal;
int main()
{
complexSignal abit;
int N = 9;
abit.push_back({1,0});
abit.push_back({1.00071 ,0 });
abit.push_back({1.00135 ,0 });
abit.push_back({1.00193 ,0 });
abit.push_back({1.00245 ,0 });
abit.push_back({1.0029 ,0 });
abit.push_back({1.00329 ,0 });
abit.push_back({1.00361 ,0 });
abit.push_back({1.00387 ,0 });
//abit.push_back({ 1.0029 ,0 });
std::cout << "Before:" << std::endl;
for (int i = 0; i < N; i++)
{
//abit.push_back(data[0][i]);
std::cout << abit[i] << std::endl;
}
std::cout << "After:" << std::endl;
fft(abit);
for (int i = 0; i < N; i++)
{
std::cout << abit[i] << std::endl;
}
return 0;
}
I'm getting the following output from CPP:
(9.02011,0)
(5.83089,-4.89513)
(0.700632,-3.98993)
(-0.000289979,0.000502368)
(-0.00218513,0.000362784)
(-0.00179241,0.00139188)
(-0.000289979,-0.000502368)
(0.000175771,-0.00354373)
(-0.003268,-0.00558837)
While I should get:
(9.020109999999999+0j)
(-0.0032675770104925446+0.005588577982060319j)
(-0.0023772289746976797+0.0024179090499282354j)
(-0.0022250000000012538+0.0011691342951078987j)
(-0.002185194014811494+0.00036271471530890747j)
(-0.0021851940148113033-0.00036271471530980844j)
(-0.0022249999999994774-0.0011691342951105632j)
(-0.002377228974696629-0.0024179090499291786j)
(-0.00326757701049002-0.005588577982061138j)
As you can see only the first result is correct.
In Python it says k % (N//3), in C++ it says i % (int)N / 3. These are not the same! See operator precedence in C++. You need to use parenthesis to execute the operations in the right order: i % ((int)N / 3).
Pepijn Kramer’s comment about changing
-2 * M_PI * i / N
into
-2.0 * M_PI * static_cast<double>(i)/static_cast<double>(N)
is good practice, in my opinion, but shouldn’t make a difference in this case because of the left-to-right evaluation and the automatic promotion of integers to floats in a float context. Still, ensuring all your constants are floats where needed doesn’t cost much effort and can prevent difficult to spot bugs.
Related
What is a correct way to do the matrix multiplication using ctype ?
in my current implementation data going back and forth consuming lots of time, is there any way to do it optimally ? by passing array address and getting pointer in return instead of generating entire array using .contents method.
cpp_function.cpp
compile using g++ -shared -fPIC cpp_function.cpp -o cpp_function.so
#include <iostream>
extern "C" {
double* mult_matrix(double *a1, double *a2, size_t a1_h, size_t a1_w,
size_t a2_h, size_t a2_w, int size)
{
double* ret_arr = new double[size];
for(size_t i = 0; i < a1_h; i++){
for (size_t j = 0; j < a2_w; j++) {
double val = 0;
for (size_t k = 0; k < a2_h; k++){
val += a1[i * a1_h + k] * a2[k * a2_h +j] ;
}
ret_arr[i * a1_h +j ] = val;
// printf("%f ", ret_arr[i * a1_h +j ]);
}
// printf("\n");
}
return ret_arr;
}
}
Python file to call the so file
main.py
import ctypes
import numpy
from time import time
libmatmult = ctypes.CDLL("./cpp_function.so")
ND_POINTER_1 = numpy.ctypeslib.ndpointer(dtype=numpy.float64,
ndim=2,
flags="C")
ND_POINTER_2 = numpy.ctypeslib.ndpointer(dtype=numpy.float64,
ndim=2,
flags="C")
libmatmult.mult_matrix.argtypes = [ND_POINTER_1, ND_POINTER_2, ctypes.c_size_t, ctypes.c_size_t]
def mult_matrix_cpp(a,b):
shape = a.shape[0] * a.shape[1]
libmatmult.mult_matrix.restype = ctypes.POINTER(ctypes.c_double * shape )
ret_cpp = libmatmult.mult_matrix(a, b, *a.shape, *b.shape , a.shape[0] * a.shape[1])
out_list_c = [i for i in ret_cpp.contents] # <---- regenrating list which is time consuming
return out_list_c
size_a = (300,300)
size_b = size_a
a = numpy.random.uniform(low=1, high=255, size=size_a)
b = numpy.random.uniform(low=1, high=255, size=size_b)
t2 = time()
out_cpp = mult_matrix_cpp(a,b)
print("cpp time taken:{:.2f} ms".format((time() - t2) * 1000))
out_cpp = numpy.array(out_cpp).reshape(size_a[0], size_a[1])
t3 = time()
out_np = numpy.dot(a,b)
# print(out_np)
print("Numpy dot() time taken:{:.2f} ms".format((time() - t3) * 1000))
This solution works but time consuming is there any way to make it faster ?
One reason for the time consumption is not using an ndpointer for the return value and copying it into a Python list. Instead use the following restype. You won't need the later reshape as well. But take the commenters' advice and don't reinvent the wheel.
def mult_matrix_cpp(a, b):
shape = a.shape[0] * a.shape[1]
libmatmult.mult_matrix.restype = np.ctypeslib.ndpointer(dtype=np.float64, ndim=2, shape=a.shape, flags="C")
return libmatmult.mult_matrix(a, b, *a.shape, *b.shape , a.shape[0] * a.shape[1])
use restype
def mult_matrix_cpp(a, b):
shape = a.shape[0] * a.shape[1]
libmatmult.mult_matrix.restype = np.ctypeslib.ndpointer(dtype=np.float64, ndim=2, shape=a.shape, flags="C")
return libmatmult.mult_matrix(a, b, *a.shape, *b.shape , a.shape[0] * a.shape[1])
so Im trying to implement the hough transform using python and c++ (using Pybind11 for interfacing between the two languages). When Im plotting the hough space it seems alright but I just can't get a line from the maximum of the voting matrix.
Here is the C++ code (looks a bit different bc I use PyBind11):
py::array_t<int> houghTransform(py::array_t<int> image, int angleStep, int angleAmount) {
auto imageBuf = image.mutable_unchecked<3>();
int height = imageBuf.shape(0);
int width = imageBuf.shape(1);
py::array_t<int> edgeMatrix = edgeDetect(imageBuf, height, width);
auto edgeMatrixBuf = edgeMatrix.mutable_unchecked<2>();
int distanceAxis = 2 * sqrt(pow((float) height, 2.0) + pow((float) width, 2.0));
int angleAxis = angleAmount;
int angleDim = (int) angleAxis / angleStep;
int distanceDim = (int) distanceAxis / 2;
py::array_t<int> votingMatrix = py::array_t<int>({distanceAxis, angleDim});
auto votingMatrixBuf = votingMatrix.mutable_unchecked<2>();
// fill voting matrices with zeros
for(int i=0; i<distanceDim; i++) {
for(int j=0; j<angleDim; j++) {
votingMatrixBuf(i, j) = 0;
}
}
// vote
for(int x=0; x<edgeMatrixBuf.shape(0); x++) {
for(int y=0; y<edgeMatrixBuf.shape(1); y++) {
if(edgeMatrixBuf(x, y) == 1) {
int counter = 0;
float theta;
float ro;
for(int thetaIdx=0; thetaIdx<=angleAxis; thetaIdx++) {
if(thetaIdx % angleStep == 0) {
counter++;
theta = (float) (thetaIdx) * (M_PI / 180);
ro = distanceDim + std::round((x * cos(theta)) + (y * sin(theta)));
votingMatrixBuf(ro, counter) += 1;
}
}
}
}
}
return votingMatrix;
}
As you can see the arguments of the functions are the image matrix, which I transform to a matrix where the edges are 1 and the rest 0, so I got my pixels of interest.
int angleAmount is what angle range I want to try outand int angleStep is how many of angles of that theta I really want to use (for example, skip every second theta). But for this example I will use angleAmount = 360 and angleStep = 1. So I will use all angles form 1 to 360.
Here is the python code:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import time
from houghTransform import houghTransform
def apply_hough_transform(image_path: str=""):
image = np.array(Image.open(image_path))
lines = houghTransform(image, 1, 360)
p = np.unravel_index(lines.argmax(), lines.shape)
max_distance = 2 * np.sqrt(pow(image.shape[0], 2) + pow(image.shape[1], 2))
ro = p[0] - (max_distance / 2)
theta = p[1] * (np.pi / 180)
a = np.cos(theta)
b = np.sin(theta)
x = a * ro
y = b * ro
pt1 = (int(x + 1000*(-b)), int(y + 1000*(a)))
pt2 = (int(x - 1000*(-b)), int(y - 1000*(a)))
fig, axs = plt.subplots(2)
axs[0].matshow(lines)
axs[0].scatter(p[1], p[0], facecolors="none", edgecolors="r")
axs[1].plot([pt1[0], pt2[0]], [pt1[1], pt2[1]])
axs[1].imshow(image)
plt.show()
apply_hough_transform(image_path="images/black_line.png")
The function houghTransform is the same as in the c++ code which I exported to Python using PyBind11.
Here are the images:
I also tried to create the line using this function:
def line(x):
return -(1 / np.arctan(theta)) * (x - ro * np.cos(theta)) + ro * np.sin(theta)
But it also didnt work.
Can you spot my error? Im sitting on this for quite some time so help is really appreciated!
I'm trying to port a "C" code to Python, this is what I coded:
scale = 1.0 / (rows * cols)
RemoveConstantBiasFrom(rarray, scale)
zarray = rarray[:]
zarray = DCT(zarray, rows, cols)
zarray = zarray.flatten()
beta = np.dot(rarray, zarray)
if iloop == 0:
parray = zarray[:]
else:
btemp = beta / beta_prev
parray = zarray + btemp * parray
RemoveConstantBiasFrom(parray, scale)
beta_prev = beta
for j in range(rows):
for i in range(cols):
k = j * rows + i
k1 = k + 1 if i < cols - 1 else k - 1
k2 = k - 1 if i > 0 else k + 1
k3 = k + cols if j < rows - 1 else k - cols
k4 = k - cols if j > 0 else k + cols
w1 = SIMIN(wts[k], wts[k1])
w2 = SIMIN(wts[k], wts[k2])
w3 = SIMIN(wts[k], wts[k3])
w4 = SIMIN(wts[k], wts[k4])
A = (w1 + w2 + w3 + w4) * parray[k]
B = (w1 * parray[k1] + w2 * parray[k2])
C = (w3 * parray[k3] + w4 * parray[k4])
zarray[k] = A - (B + C)
wts and parray are two flattened arrays with 262144 values (a 512x512 matrix). When I'm perfoming this operation I get:
test.py:152: RuntimeWarning: overflow encountered in double_scalars
B = (w1 * parray[k1] + w2 * parray[k2])
test.py:154: RuntimeWarning: overflow encountered in double_scalars
C = (w3 * parray[k3] + w4 * parray[k4])
test.py:155: RuntimeWarning: overflow encountered in double_scalars
zarray[k] = A - B + C
So, I started to "debug" the code.
1) I put a print to max(parray) before the loops and I obtained:
Maximum value for parray 15.2665322926
2) I added an if statement inside the loop to watch parray[k1] behavior if parray[k1] > maxparray: print k1 and I obtained a lot of "k1's":
...
251902
252414
252926
253438
253950
254462
254974
255486
255998
256510
257022
257534
258046
258558
259070
259582
260094
260606
261118
261630
262142
So, the question: If I never changed "parray" why I'm getting different values exceeding max(parray)?
This is the C code:
int i, j, k;
int k1, k2, k3, k4;
double sum, w1, w2, w3, w4, btemp, delta, avg, scale;
float *wts;
scale = 1.0/(xsize*ysize);
/* remove constant bias from rarray */
for (k=0, avg=0.0; k<xsize*ysize; k++) avg += rarray[k];
avg *= scale;
for (k=0; k<xsize*ysize; k++) rarray[k] -= avg;
/* compute cosine transform solution of Laplacian in rarray */
for (k=0; k<xsize*ysize; k++) {
zarray[k] = rarray[k];
}
DCT(zarray, xsize, ysize, xcos, ycos);
/* calculate beta and parray */
for (k=0, *beta=0.0; k<xsize*ysize; k++) {
*beta += rarray[k]*zarray[k];
}
printf("beta = %lf\n", *beta);
if (iloop == 0) {
for (k=0; k<xsize*ysize; k++) {
parray[k] = zarray[k];
}
}
else {
btemp = (*beta)/(*beta_prev);
for (k=0; k<xsize*ysize; k++) {
parray[k] = zarray[k] + btemp*parray[k];
}
}
/* remove constant bias from parray */
for (k=0, avg=0.0; k<xsize*ysize; k++) avg += parray[k];
avg *= scale;
for (k=0; k<xsize*ysize; k++) parray[k] -= avg;
*beta_prev = *beta;
/* calculate Qp */
for (j=0; j<ysize; j++) {
for (i=0; i<xsize; i++) {
k = j*xsize + i;
k1 = (i<xsize-1) ? k + 1 : k - 1;
k2 = (i>0) ? k - 1 : k + 1;
k3 = (j<ysize-1) ? k + xsize : k - xsize;
k4 = (j>0) ? k - xsize : k + xsize;
if (dxwts==NULL && dywts==NULL) { /* unweighted */
w1 = w2 = w3 = w4 = 1.0;
}
else if (dxwts==NULL || dywts==NULL) { /* one set of wts */
wts = (dxwts) ? dxwts : dywts;
w1 = SIMIN(wts[k], wts[k1]);
w2 = SIMIN(wts[k], wts[k2]);
w3 = SIMIN(wts[k], wts[k3]);
w4 = SIMIN(wts[k], wts[k4]);
}
else { /* dxwts and dywts are both supplied */
w1 = dxwts[k];
w2 = (i>0) ? dxwts[k-1] : dxwts[k];
w3 = dywts[k];
w4 = (j>0) ? dywts[k-xsize] : dywts[k];
}
zarray[k] = (w1 + w2 + w3 + w4)*parray[k]
- (w1*parray[k1] + w2*parray[k2]
+ w3*parray[k3] + w4*parray[k4]);
}
}
I'm just passing dxwts (one set of weights) so the others are not needed, that's why I'm using one if statement, where SIMIN is:
#define SIMIN(x,y) (((x)*(x) < (y)*(y)) ? (x)*(x) : (y)*(y))
Most likely is that parray and zarray are the same array.
If you do this:
zarray = DCTF()
parray = zarray
then zarray and parray are the same array and changing an element in zarray will be visible in uses of parray. (This is not significantly different from C.)
If you want a copy, you can do this:
zarray = DCTF()
parray = zarray[:]
The problem is the way I'm copying the arrays, the correct way is:
zarray[:] = rarray
I'm attempting to implement the discrete time wave equation in OpenCL. I think I'm pretty close, but the results look like what I would expect from the heat equation. I know they're very similar, but when I've implemented the 2D wave equation (not using OpenCL) I got distinct wavefronts and reflections. With the OpenCL kernel below everything diffuses until it is a wash.
__kernel void wave_calc(
__global float* height,
__global float* height_old,
const unsigned int len_x,
const unsigned int len_y,
const unsigned int len_z,
const float dtxc_term)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);
unsigned int z = get_global_id(2);
int this_cell = x + len_y * (y + len_x * z);
float laplacian;
if (x==0 || x==(len_x-1) || y==0 || y==(len_y-1) || z==0 || z==(len_z-1)) {
laplacian = 0;
height_old[this_cell] = height[this_cell];
height[this_cell] = 0;
}
else if ( x < len_x-1 && y < len_y-1 && z < len_z-1 ){
int n1 = x - 1 + len_y * (y + len_x * z);
int n2 = x + 1 + len_y * (y + len_x * z);
int n3 = x + len_y * (y - 1 + len_x * z);
int n4 = x + len_y * (y + 1 + len_x * z);
int n5 = x + len_y * (y + len_x * (z -1));
int n6 = x + len_y * (y + len_x * (z + 1));
laplacian = -6 * height[this_cell] +
height[n1] +
height[n2] +
height[n3] +
height[n4] +
height[n5] +
height[n6];
height_old[this_cell] = height[this_cell];
height[this_cell] = (dtxc_term*laplacian+2*height[this_cell]) - height_old[this_cell];
}
}
(DTXC is the result of ((DT * DT)/(DX * DX)) * C passed from the host)
Every step I copy height back to the host for plotting, and then call the function again.
for i in np.arange(steps):
#copy height from host to device
cl.enqueue_copy(queue, d_height, h_height)
#step once
wave_calc(queue, field_3d.shape, None, d_height, d_height_old, LEN_X, LEN_Y, LEN_Z, DTXC)
queue.finish()
#copy height back
cl.enqueue_copy(queue, h_height, d_height)
#do my plotting
Any thoughts/suggestions/condescending remarks? All would be appreciated. :)
Here is an update to answer Joel's question:
I'm not much good when it comes to calculus, but I'm taking a working C++ implementation in 2D and trying to adapt it for 3D. Below is the C++. The only modification I made was to the loop, since there are 6 neighbor cells in 3D instead of 4. In both cases the outer walls of the plane/cube are set to 0:
for(int x=1; x<field.xRes()-1;x++) {
for (int y=1; y<field.yRes()-1; y++) {
laplacian(x,y) = -4 * height(x,y) +
height(x-1,y) +
height(x+1,y) +
height(x,y-1) +
height(x,y+1);
}
}
const float dt = 0.001;
const float xLen = 1.0;
const float C = 1.0;
const float dx = xLen/xRes;
backup = height;
height = ((dt*dt)/(dx*dx))*C*laplacian+2*height;
height = height - heightOld;
heightOld = backup;
I am trying to adapt the code here : http://code.activestate.com/recipes/577166-newton-fractals/ into C and am having some trouble. I am using C99's complex type.
I basically have tried a few different approaches which haven't worked. On every pixel it goes all the way to the maximum iteration every time, so the image comes out as a solid color.
Is there soemthing I have fundementally wrong about the way the types work in C, it seems like it should work, I reconstructed the algorithm pretty exactly.
//newt.c
#include <stdio.h>
#include <stdlib.h>
#include <complex.h>
#include <float.h>
#include <math.h>
complex f(complex z);
const int WIDTH = 512, HEIGHT = 512;
const double SCALED_X_MAX = 1.0;
const double SCALED_X_MIN = -1.0;
const double SCALED_Y_MAX = 1.0;
const double SCALED_Y_MIN = -1.0;
const int MAX_ITERATIONS = 20;
const int EPSILON = 1e-3;
int main(int argc, char **argv) {
const double SCALED_WIDTH = SCALED_X_MAX - SCALED_X_MIN ;
const double SCALED_HEIGHT = SCALED_Y_MAX - SCALED_Y_MIN ;
FILE * image = fopen("newton.ppm", "w") ;
fprintf(image, "P3\n");
fprintf(image, "%d %d\n" , WIDTH , HEIGHT ) ;
fprintf(image, "%d\n", 255) ;
for ( int y = 0 ; y < HEIGHT ; ++y) {
double zy = y * (SCALED_HEIGHT)/(HEIGHT-1) + SCALED_Y_MIN;
for ( int x = 0 ; x < WIDTH ; ++x ) {
double zx = x * (SCALED_WIDTH)/(WIDTH-1) + SCALED_X_MIN;
complex z = zx + zy*I;
int iteration = 0;
while(iteration < MAX_ITERATIONS ) {
// complex h=sqrt(DBL_EPSILON) + sqrt(DBL_EPSILON)*I;
double h=1e-6;
/*
complex zph = z + h;
complex dz = zph - z;
complex slope = (f(zph) - f(z))/dz;
*/
complex volatile dz = (f(z + (h+h*I)) - f(z)) / (h+I*h) ;
complex z0 = z - f(z) / dz;
//fprintf(stdout,"%f \n", cabs(z0 - z ));
if ( cabs(z0 - z) < EPSILON){
break;
}
z = z0;
iteration++;
}
if (iteration != MAX_ITERATIONS) fprintf(stdout, "%d " , iteration );
fprintf(image,"%3d %3d %3d ", iteration % 4 * 64 ,
iteration % 8 * 32 ,
iteration % 16 * 16);
}
fprintf(image, "\n") ;
}
fclose(image) ;
exit(0);
}
complex f(complex z ) {
return cpow(z,3)-1.0 ;
}
After checking over the complex maths and not seeing any problems I noticed that your error is simply due to the integer division in the line
zy = y * (SCALED_HEIGHT)/(HEIGHT-1) + SCALED_Y_MIN;
and similarly for zx. Because (SCALED_HEIGHT) and (HEIGHT-1) are both integers this is not going to give you the floating point result you require. Try using:
zy = y * SCALED_HEIGHT * 1.0/(HEIGHT-1) + SCALED_Y_MIN;
and similarly for zx.
EDIT: Sorry the above was in error. Your SCALED_HEIGHT was in fact double so the above was actually ok. The real problem is simply in the line
const int EPSILON = 1e-3;
This will in fact always return zero, because it's an integer. You need to make EPSILON a floating point type.