aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt22
-rw-r--r--pffft_double.c (renamed from pffft64.c)160
-rw-r--r--pffft_double.h (renamed from pffft64.h)36
-rw-r--r--test_pffft_double.c (renamed from test_pffft64.c)58
4 files changed, 138 insertions, 138 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0eb59f0..d3e6c9a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,23 +72,23 @@ set_property(TARGET PFFFT APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
)
-add_library(PFFFT64 STATIC pffft64.c)
-target_compile_definitions(PFFFT64 PRIVATE _USE_MATH_DEFINES)
+add_library(PFFFT_DOUBLE STATIC pffft_double.c)
+target_compile_definitions(PFFFT_DOUBLE PRIVATE _USE_MATH_DEFINES)
if (USE_ASAN)
- target_compile_options(PFFFT64 PRIVATE "-fsanitize=address")
+ target_compile_options(PFFFT_DOUBLE PRIVATE "-fsanitize=address")
endif()
if (NOT USE_SIMD)
- target_compile_definitions(PFFFT64 PRIVATE PFFFT64_SIMD_DISABLE=1)
+ target_compile_definitions(PFFFT_DOUBLE PRIVATE PFFFTD_SIMD_DISABLE=1)
endif()
if (USE_SIMD AND USE_AVX)
if(WIN32)
- target_compile_options(PFFFT64 PRIVATE "/arch=AVX")
+ target_compile_options(PFFFT_DOUBLE PRIVATE "/arch=AVX")
else(WIN32)
- target_compile_options(PFFFT64 PRIVATE "-march=native")
+ target_compile_options(PFFFT_DOUBLE PRIVATE "-march=native")
endif(WIN32)
endif()
-target_link_libraries( PFFFT64 ${MATHLIB} )
-set_property(TARGET PFFFT64 APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+target_link_libraries( PFFFT_DOUBLE ${MATHLIB} )
+set_property(TARGET PFFFT_DOUBLE APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
)
@@ -123,9 +123,9 @@ target_compile_definitions(test_pffft PRIVATE _USE_MATH_DEFINES)
target_link_libraries( test_pffft PFFFT ${ASANLIB} )
-add_executable( test_pffft64 test_pffft64.c )
-target_compile_definitions(test_pffft64 PRIVATE _USE_MATH_DEFINES)
-target_link_libraries( test_pffft64 PFFFT64 ${ASANLIB} )
+add_executable( test_pffft_double test_pffft_double.c )
+target_compile_definitions(test_pffft_double PRIVATE _USE_MATH_DEFINES)
+target_link_libraries( test_pffft_double PFFFTD ${ASANLIB} )
add_executable(test_pffastconv test_pffastconv.c )
diff --git a/pffft64.c b/pffft_double.c
index 2b3d787..0d25421 100644
--- a/pffft64.c
+++ b/pffft_double.c
@@ -79,7 +79,7 @@
# include <alloca.h>
#endif
-#include "pffft64.h"
+#include "pffft_double.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
@@ -105,13 +105,13 @@
*/
-// define PFFFT64_SIMD_DISABLE if you want to use scalar code instead of simd code
-//#define PFFFT64_SIMD_DISABLE
+// define PFFFTD_SIMD_DISABLE if you want to use scalar code instead of simd code
+//#define PFFFTD_SIMD_DISABLE
/*
AVX support macros
*/
-#if !defined(PFFFT64_SIMD_DISABLE) && defined(__AVX__)
+#if !defined(PFFFTD_SIMD_DISABLE) && defined(__AVX__)
#include <immintrin.h>
typedef __m256d v4sd;
@@ -180,11 +180,11 @@ return [ b[0], b[1], a[2], a[3] ]
_mm256_insertf128_pd(_mm256_castpd128_pd256(_mm256_castpd256_pd128(b)), _mm256_extractf128_pd(a, 1), 1)
# define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0x1F) == 0)
#else
-#define PFFFT64_SIMD_DISABLE
+#define PFFFTD_SIMD_DISABLE
#endif
// fallback mode for situations where AVX is not available, use scalar mode instead
-#ifdef PFFFT64_SIMD_DISABLE
+#ifdef PFFFTD_SIMD_DISABLE
typedef double v4sd;
# define SIMD_SZ 1
# define VZERO() 0.f
@@ -204,7 +204,7 @@ typedef double v4sd;
#define SVMUL(f,v) VMUL(LD_PS1(f),v)
#endif
-#if !defined(PFFFT64_SIMD_DISABLE)
+#if !defined(PFFFTD_SIMD_DISABLE)
typedef union v4sd_union {
v4sd v;
double f[4];
@@ -215,7 +215,7 @@ typedef union v4sd_union {
#define assertv4(v,f0,f1,f2,f3) assert(v.f[0] == (f0) && v.f[1] == (f1) && v.f[2] == (f2) && v.f[3] == (f3))
/* detect bugs with the vector support macros */
-void validate_pffft64_simd() {
+void validate_pffftd_simd() {
double f[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 };
v4sd_union a0, a1, a2, a3, t, u;
memcpy(a0.f, f, 4*sizeof(double));
@@ -251,11 +251,11 @@ void validate_pffft64_simd() {
a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
}
-#endif //!PFFFT64_SIMD_DISABLE
+#endif //!PFFFTD_SIMD_DISABLE
/* SSE and co like 16-bytes aligned pointers */
#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines...
-void *pffft64_aligned_malloc(size_t nb_bytes) {
+void *pffftd_aligned_malloc(size_t nb_bytes) {
void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
if (!p0) return (void *) 0;
p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1))));
@@ -263,11 +263,11 @@ void *pffft64_aligned_malloc(size_t nb_bytes) {
return p;
}
-void pffft64_aligned_free(void *p) {
+void pffftd_aligned_free(void *p) {
if (p) free(*((void **) p - 1));
}
-int pffft64_simd_size() { return SIMD_SZ; }
+int pffftd_simd_size() { return SIMD_SZ; }
/*
passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2
@@ -1199,34 +1199,34 @@ static v4sd *cfftf1_ps(int n, const v4sd *input_readonly, v4sd *work1, v4sd *wor
}
-struct PFFFT64_Setup {
+struct PFFFTD_Setup {
int N;
- int Ncvec; // nb of complex simd vectors (N/4 if PFFFT64_COMPLEX, N/8 if PFFFT64_REAL)
+ int Ncvec; // nb of complex simd vectors (N/4 if PFFFTD_COMPLEX, N/8 if PFFFTD_REAL)
int ifac[15];
- pffft64_transform_t transform;
+ pffftd_transform_t transform;
v4sd *data; // allocated room for twiddle coefs
double *e; // points into 'data' , N/4*3 elements
double *twiddle; // points into 'data', N/4 elements
};
-PFFFT64_Setup *pffft64_new_setup(int N, pffft64_transform_t transform) {
- PFFFT64_Setup *s = (PFFFT64_Setup*)malloc(sizeof(PFFFT64_Setup));
+PFFFTD_Setup *pffftd_new_setup(int N, pffftd_transform_t transform) {
+ PFFFTD_Setup *s = (PFFFTD_Setup*)malloc(sizeof(PFFFTD_Setup));
int k, m;
/* unfortunately, the fft size must be a multiple of 16 for complex FFTs
and 32 for real FFTs -- a lot of stuff would need to be rewritten to
handle other cases (or maybe just switch to a scalar fft, I don't know..) */
- if (transform == PFFFT64_REAL) { assert((N%(2*SIMD_SZ*SIMD_SZ))==0 && N>0); }
- if (transform == PFFFT64_COMPLEX) { assert((N%(SIMD_SZ*SIMD_SZ))==0 && N>0); }
+ if (transform == PFFFTD_REAL) { assert((N%(2*SIMD_SZ*SIMD_SZ))==0 && N>0); }
+ if (transform == PFFFTD_COMPLEX) { assert((N%(SIMD_SZ*SIMD_SZ))==0 && N>0); }
//assert((N % 32) == 0);
s->N = N;
s->transform = transform;
/* nb of complex simd vectors */
- s->Ncvec = (transform == PFFFT64_REAL ? N/2 : N)/SIMD_SZ;
- s->data = (v4sd*)pffft64_aligned_malloc(2*s->Ncvec * sizeof(v4sd));
+ s->Ncvec = (transform == PFFFTD_REAL ? N/2 : N)/SIMD_SZ;
+ s->data = (v4sd*)pffftd_aligned_malloc(2*s->Ncvec * sizeof(v4sd));
s->e = (double*)s->data;
s->twiddle = (double*)(s->data + (2*s->Ncvec*(SIMD_SZ-1))/SIMD_SZ);
- if (transform == PFFFT64_REAL) {
+ if (transform == PFFFTD_REAL) {
for (k=0; k < s->Ncvec; ++k) {
int i = k/SIMD_SZ;
int j = k%SIMD_SZ;
@@ -1253,19 +1253,19 @@ PFFFT64_Setup *pffft64_new_setup(int N, pffft64_transform_t transform) {
/* check that N is decomposable with allowed prime factors */
for (k=0, m=1; k < s->ifac[1]; ++k) { m *= s->ifac[2+k]; }
if (m != N/SIMD_SZ) {
- pffft64_destroy_setup(s); s = 0;
+ pffftd_destroy_setup(s); s = 0;
}
return s;
}
-void pffft64_destroy_setup(PFFFT64_Setup *s) {
- pffft64_aligned_free(s->data);
+void pffftd_destroy_setup(PFFFTD_Setup *s) {
+ pffftd_aligned_free(s->data);
free(s);
}
-#if !defined(PFFFT64_SIMD_DISABLE)
+#if !defined(PFFFTD_SIMD_DISABLE)
/* [0 0 1 2 3 4 5 6 7 8] -> [0 8 7 6 5 4 3 2 1] */
static void reversed_copy(int N, const v4sd *in, int in_stride, v4sd *out) {
@@ -1301,14 +1301,14 @@ static void unreversed_copy(int N, const v4sd *in, v4sd *out, int out_stride) {
UNINTERLEAVE2(h0, g1, out[0], out[1]);
}
-void pffft64_zreorder(PFFFT64_Setup *setup, const double *in, double *out, pffft64_direction_t direction) {
+void pffftd_zreorder(PFFFTD_Setup *setup, const double *in, double *out, pffftd_direction_t direction) {
int k, N = setup->N, Ncvec = setup->Ncvec;
const v4sd *vin = (const v4sd*)in;
v4sd *vout = (v4sd*)out;
assert(in != out);
- if (setup->transform == PFFFT64_REAL) {
+ if (setup->transform == PFFFTD_REAL) {
int k, dk = N/32;
- if (direction == PFFFT64_FORWARD) {
+ if (direction == PFFFTD_FORWARD) {
for (k=0; k < dk; ++k) {
INTERLEAVE2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]);
INTERLEAVE2(vin[k*8 + 4], vin[k*8 + 5], vout[2*(2*dk + k) + 0], vout[2*(2*dk + k) + 1]);
@@ -1324,7 +1324,7 @@ void pffft64_zreorder(PFFFT64_Setup *setup, const double *in, double *out, pffft
unreversed_copy(dk, (v4sd*)(in + 3*N/4), (v4sd*)(out + N - 2*SIMD_SZ), -8);
}
} else {
- if (direction == PFFFT64_FORWARD) {
+ if (direction == PFFFTD_FORWARD) {
for (k=0; k < Ncvec; ++k) {
int kk = (k/4) + (k%4)*(Ncvec/4);
INTERLEAVE2(vin[k*2], vin[k*2+1], vout[kk*2], vout[kk*2+1]);
@@ -1338,7 +1338,7 @@ void pffft64_zreorder(PFFFT64_Setup *setup, const double *in, double *out, pffft
}
}
-void pffft64_cplx_finalize(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e) {
+void pffftd_cplx_finalize(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e) {
int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks
v4sd r0, i0, r1, i1, r2, i2, r3, i3;
v4sd sr0, dr0, sr1, dr1, si0, di0, si1, di1;
@@ -1382,7 +1382,7 @@ void pffft64_cplx_finalize(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e)
}
}
-void pffft64_cplx_preprocess(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e) {
+void pffftd_cplx_preprocess(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e) {
int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks
v4sd r0, i0, r1, i1, r2, i2, r3, i3;
v4sd sr0, dr0, sr1, dr1, si0, di0, si1, di1;
@@ -1416,7 +1416,7 @@ void pffft64_cplx_preprocess(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e
}
-static ALWAYS_INLINE(void) pffft64_real_finalize_4x4(const v4sd *in0, const v4sd *in1, const v4sd *in,
+static ALWAYS_INLINE(void) pffftd_real_finalize_4x4(const v4sd *in0, const v4sd *in1, const v4sd *in,
const v4sd *e, v4sd *out) {
v4sd r0, i0, r1, i1, r2, i2, r3, i3;
v4sd sr0, dr0, sr1, dr1, si0, di0, si1, di1;
@@ -1473,7 +1473,7 @@ static ALWAYS_INLINE(void) pffft64_real_finalize_4x4(const v4sd *in0, const v4sd
}
-static NEVER_INLINE(void) pffft64_real_finalize(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e) {
+static NEVER_INLINE(void) pffftd_real_finalize(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e) {
int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks
/* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
@@ -1484,7 +1484,7 @@ static NEVER_INLINE(void) pffft64_real_finalize(int Ncvec, const v4sd *in, v4sd
cr.v = in[0]; ci.v = in[Ncvec*2-1];
assert(in != out);
- pffft64_real_finalize_4x4(&zero, &zero, in+1, e, out);
+ pffftd_real_finalize_4x4(&zero, &zero, in+1, e, out);
/*
[cr0 cr1 cr2 cr3 ci0 ci1 ci2 ci3]
@@ -1510,14 +1510,14 @@ static NEVER_INLINE(void) pffft64_real_finalize(int Ncvec, const v4sd *in, v4sd
for (k=1; k < dk; ++k) {
v4sd save_next = in[8*k+7];
- pffft64_real_finalize_4x4(&save, &in[8*k+0], in + 8*k+1,
+ pffftd_real_finalize_4x4(&save, &in[8*k+0], in + 8*k+1,
e + k*6, out + k*8);
save = save_next;
}
}
-static ALWAYS_INLINE(void) pffft64_real_preprocess_4x4(const v4sd *in,
+static ALWAYS_INLINE(void) pffftd_real_preprocess_4x4(const v4sd *in,
const v4sd *e, v4sd *out, int first) {
v4sd r0=in[0], i0=in[1], r1=in[2], i1=in[3], r2=in[4], i2=in[5], r3=in[6], i3=in[7];
/*
@@ -1566,7 +1566,7 @@ static ALWAYS_INLINE(void) pffft64_real_preprocess_4x4(const v4sd *in,
*out++ = i3;
}
-static NEVER_INLINE(void) pffft64_real_preprocess(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e) {
+static NEVER_INLINE(void) pffftd_real_preprocess(int Ncvec, const v4sd *in, v4sd *out, const v4sd *e) {
int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks
/* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
@@ -1579,7 +1579,7 @@ static NEVER_INLINE(void) pffft64_real_preprocess(int Ncvec, const v4sd *in, v4s
Xi.f[k] = ((double*)in)[8*k+4];
}
- pffft64_real_preprocess_4x4(in, e, out+1, 1); // will write only 6 values
+ pffftd_real_preprocess_4x4(in, e, out+1, 1); // will write only 6 values
/*
[Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3]
@@ -1594,7 +1594,7 @@ static NEVER_INLINE(void) pffft64_real_preprocess(int Ncvec, const v4sd *in, v4s
[ci3] [0 -s 0 s 0 -s 0 -s]
*/
for (k=1; k < dk; ++k) {
- pffft64_real_preprocess_4x4(in+8*k, e + k*6, out-1+k*8, 0);
+ pffftd_real_preprocess_4x4(in+8*k, e + k*6, out-1+k*8, 0);
}
cr0=(Xr.f[0]+Xi.f[0]) + 2*Xr.f[2]; uout[0].f[0] = cr0;
@@ -1608,8 +1608,8 @@ static NEVER_INLINE(void) pffft64_real_preprocess(int Ncvec, const v4sd *in, v4s
}
-void pffft64_transform_internal(PFFFT64_Setup *setup, const double *finput, double *foutput, v4sd *scratch,
- pffft64_direction_t direction, int ordered) {
+void pffftd_transform_internal(PFFFTD_Setup *setup, const double *finput, double *foutput, v4sd *scratch,
+ pffftd_direction_t direction, int ordered) {
int k, Ncvec = setup->Ncvec;
int nf_odd = (setup->ifac[1] & 1);
@@ -1625,12 +1625,12 @@ void pffft64_transform_internal(PFFFT64_Setup *setup, const double *finput, doub
assert(VALIGNED(finput) && VALIGNED(foutput));
//assert(finput != foutput);
- if (direction == PFFFT64_FORWARD) {
+ if (direction == PFFFTD_FORWARD) {
ib = !ib;
- if (setup->transform == PFFFT64_REAL) {
+ if (setup->transform == PFFFTD_REAL) {
ib = (rfftf1_ps(Ncvec*2, vinput, buff[ib], buff[!ib],
setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
- pffft64_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sd*)setup->e);
+ pffftd_real_finalize(Ncvec, buff[ib], buff[!ib], (v4sd*)setup->e);
} else {
v4sd *tmp = buff[ib];
for (k=0; k < Ncvec; ++k) {
@@ -1638,25 +1638,25 @@ void pffft64_transform_internal(PFFFT64_Setup *setup, const double *finput, doub
}
ib = (cfftf1_ps(Ncvec, buff[ib], buff[!ib], buff[ib],
setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1);
- pffft64_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sd*)setup->e);
+ pffftd_cplx_finalize(Ncvec, buff[ib], buff[!ib], (v4sd*)setup->e);
}
if (ordered) {
- pffft64_zreorder(setup, (double*)buff[!ib], (double*)buff[ib], PFFFT64_FORWARD);
+ pffftd_zreorder(setup, (double*)buff[!ib], (double*)buff[ib], PFFFTD_FORWARD);
} else ib = !ib;
} else {
if (vinput == buff[ib]) {
ib = !ib; // may happen when finput == foutput
}
if (ordered) {
- pffft64_zreorder(setup, (double*)vinput, (double*)buff[ib], PFFFT64_BACKWARD);
+ pffftd_zreorder(setup, (double*)vinput, (double*)buff[ib], PFFFTD_BACKWARD);
vinput = buff[ib]; ib = !ib;
}
- if (setup->transform == PFFFT64_REAL) {
- pffft64_real_preprocess(Ncvec, vinput, buff[ib], (v4sd*)setup->e);
+ if (setup->transform == PFFFTD_REAL) {
+ pffftd_real_preprocess(Ncvec, vinput, buff[ib], (v4sd*)setup->e);
ib = (rfftb1_ps(Ncvec*2, buff[ib], buff[0], buff[1],
setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
} else {
- pffft64_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sd*)setup->e);
+ pffftd_cplx_preprocess(Ncvec, vinput, buff[ib], (v4sd*)setup->e);
ib = (cfftf1_ps(Ncvec, buff[ib], buff[0], buff[1],
setup->twiddle, &setup->ifac[0], +1) == buff[0] ? 0 : 1);
for (k=0; k < Ncvec; ++k) {
@@ -1677,7 +1677,7 @@ void pffft64_transform_internal(PFFFT64_Setup *setup, const double *finput, doub
assert(buff[ib] == voutput);
}
-void pffft64_zconvolve_accumulate(PFFFT64_Setup *s, const double *a, const double *b, double *ab, double scaling) {
+void pffftd_zconvolve_accumulate(PFFFTD_Setup *s, const double *a, const double *b, double *ab, double scaling) {
int Ncvec = s->Ncvec;
const v4sd * RESTRICT va = (const v4sd*)a;
const v4sd * RESTRICT vb = (const v4sd*)b;
@@ -1766,25 +1766,25 @@ void pffft64_zconvolve_accumulate(PFFFT64_Setup *s, const double *a, const doubl
vab[2*i+3] = VMADD(ai, vscal, vab[2*i+3]);
}
#endif
- if (s->transform == PFFFT64_REAL) {
+ if (s->transform == PFFFTD_REAL) {
((v4sd_union*)vab)[0].f[0] = abr + ar*br*scaling;
((v4sd_union*)vab)[1].f[0] = abi + ai*bi*scaling;
}
}
-#else // defined(PFFFT64_SIMD_DISABLE)
+#else // defined(PFFFTD_SIMD_DISABLE)
// standard routine using scalar floats, without SIMD stuff.
-#define pffft64_zreorder_nosimd pffft64_zreorder
-void pffft64_zreorder_nosimd(PFFFT64_Setup *setup, const double *in, double *out, pffft64_direction_t direction) {
+#define pffftd_zreorder_nosimd pffftd_zreorder
+void pffftd_zreorder_nosimd(PFFFTD_Setup *setup, const double *in, double *out, pffftd_direction_t direction) {
int k, N = setup->N;
- if (setup->transform == PFFFT64_COMPLEX) {
+ if (setup->transform == PFFFTD_COMPLEX) {
for (k=0; k < 2*N; ++k) out[k] = in[k];
return;
}
- else if (direction == PFFFT64_FORWARD) {
+ else if (direction == PFFFTD_FORWARD) {
double x_N = in[N-1];
for (k=N-1; k > 1; --k) out[k] = in[k-1];
out[0] = in[0];
@@ -1797,9 +1797,9 @@ void pffft64_zreorder_nosimd(PFFFT64_Setup *setup, const double *in, double *out
}
}
-#define pffft64_transform_internal_nosimd pffft64_transform_internal
-void pffft64_transform_internal_nosimd(PFFFT64_Setup *setup, const double *input, double *output, double *scratch,
- pffft64_direction_t direction, int ordered) {
+#define pffftd_transform_internal_nosimd pffftd_transform_internal
+void pffftd_transform_internal_nosimd(PFFFTD_Setup *setup, const double *input, double *output, double *scratch,
+ pffftd_direction_t direction, int ordered) {
int Ncvec = setup->Ncvec;
int nf_odd = (setup->ifac[1] & 1);
@@ -1811,11 +1811,11 @@ void pffft64_transform_internal_nosimd(PFFFT64_Setup *setup, const double *input
if (scratch == 0) scratch = scratch_on_stack;
buff[0] = output; buff[1] = scratch;
- if (setup->transform == PFFFT64_COMPLEX) ordered = 0; // it is always ordered.
+ if (setup->transform == PFFFTD_COMPLEX) ordered = 0; // it is always ordered.
ib = (nf_odd ^ ordered ? 1 : 0);
- if (direction == PFFFT64_FORWARD) {
- if (setup->transform == PFFFT64_REAL) {
+ if (direction == PFFFTD_FORWARD) {
+ if (setup->transform == PFFFTD_REAL) {
ib = (rfftf1_ps(Ncvec*2, input, buff[ib], buff[!ib],
setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
} else {
@@ -1823,17 +1823,17 @@ void pffft64_transform_internal_nosimd(PFFFT64_Setup *setup, const double *input
setup->twiddle, &setup->ifac[0], -1) == buff[0] ? 0 : 1);
}
if (ordered) {
- pffft64_zreorder(setup, buff[ib], buff[!ib], PFFFT64_FORWARD); ib = !ib;
+ pffftd_zreorder(setup, buff[ib], buff[!ib], PFFFTD_FORWARD); ib = !ib;
}
} else {
if (input == buff[ib]) {
ib = !ib; // may happen when finput == foutput
}
if (ordered) {
- pffft64_zreorder(setup, input, buff[!ib], PFFFT64_BACKWARD);
+ pffftd_zreorder(setup, input, buff[!ib], PFFFTD_BACKWARD);
input = buff[!ib];
}
- if (setup->transform == PFFFT64_REAL) {
+ if (setup->transform == PFFFTD_REAL) {
ib = (rfftb1_ps(Ncvec*2, input, buff[ib], buff[!ib],
setup->twiddle, &setup->ifac[0]) == buff[0] ? 0 : 1);
} else {
@@ -1854,12 +1854,12 @@ void pffft64_transform_internal_nosimd(PFFFT64_Setup *setup, const double *input
assert(buff[ib] == output);
}
-#define pffft64_zconvolve_accumulate_nosimd pffft64_zconvolve_accumulate
-void pffft64_zconvolve_accumulate_nosimd(PFFFT64_Setup *s, const double *a, const double *b,
+#define pffftd_zconvolve_accumulate_nosimd pffftd_zconvolve_accumulate
+void pffftd_zconvolve_accumulate_nosimd(PFFFTD_Setup *s, const double *a, const double *b,
double *ab, double scaling) {
int i, Ncvec = s->Ncvec;
- if (s->transform == PFFFT64_REAL) {
+ if (s->transform == PFFFTD_REAL) {
// take care of the fftpack ordering
ab[0] += a[0]*b[0]*scaling;
ab[2*Ncvec-1] += a[2*Ncvec-1]*b[2*Ncvec-1]*scaling;
@@ -1875,18 +1875,18 @@ void pffft64_zconvolve_accumulate_nosimd(PFFFT64_Setup *s, const double *a, cons
}
}
-#endif // defined(PFFFT64_SIMD_DISABLE)
+#endif // defined(PFFFTD_SIMD_DISABLE)
-void pffft64_transform(PFFFT64_Setup *setup, const double *input, double *output, double *work, pffft64_direction_t direction) {
- pffft64_transform_internal(setup, input, output, (v4sd*)work, direction, 0);
+void pffftd_transform(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffftd_direction_t direction) {
+ pffftd_transform_internal(setup, input, output, (v4sd*)work, direction, 0);
}
-void pffft64_transform_ordered(PFFFT64_Setup *setup, const double *input, double *output, double *work, pffft64_direction_t direction) {
- pffft64_transform_internal(setup, input, output, (v4sd*)work, direction, 1);
+void pffftd_transform_ordered(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffftd_direction_t direction) {
+ pffftd_transform_internal(setup, input, output, (v4sd*)work, direction, 1);
}
-int pffft64_next_power_of_two(int N) {
+int pffftd_next_power_of_two(int N) {
/* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
/* compute the next highest power of 2 of 32-bit v */
unsigned v = N;
@@ -1900,19 +1900,19 @@ int pffft64_next_power_of_two(int N) {
return v;
}
-int pffft64_is_power_of_two(int N) {
+int pffftd_is_power_of_two(int N) {
/* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */
int f = N && !(N & (N - 1));
return f;
}
-int pffft64_min_fft_size(pffft64_transform_t transform) {
+int pffftd_min_fft_size(pffftd_transform_t transform) {
/* unfortunately, the fft size must be a multiple of 16 for complex FFTs
and 32 for real FFTs -- a lot of stuff would need to be rewritten to
handle other cases (or maybe just switch to a scalar fft, I don't know..) */
- if (transform == PFFFT64_REAL)
+ if (transform == PFFFTD_REAL)
return (2 * SIMD_SZ * SIMD_SZ);
- else if (transform == PFFFT64_COMPLEX)
+ else if (transform == PFFFTD_COMPLEX)
return (SIMD_SZ * SIMD_SZ);
else
return 1;
diff --git a/pffft64.h b/pffft_double.h
index 9225028..9f50228 100644
--- a/pffft64.h
+++ b/pffft_double.h
@@ -92,21 +92,21 @@ extern "C" {
this struct can be shared by many threads as it contains only
read-only data.
*/
- typedef struct PFFFT64_Setup PFFFT64_Setup;
+ typedef struct PFFFTD_Setup PFFFTD_Setup;
/* direction of the transform */
- typedef enum { PFFFT64_FORWARD, PFFFT64_BACKWARD } pffft64_direction_t;
+ typedef enum { PFFFTD_FORWARD, PFFFTD_BACKWARD } pffftd_direction_t;
/* type of transform */
- typedef enum { PFFFT64_REAL, PFFFT64_COMPLEX } pffft64_transform_t;
+ typedef enum { PFFFTD_REAL, PFFFTD_COMPLEX } pffftd_transform_t;
/*
prepare for performing transforms of size N -- the returned
- PFFFT64_Setup structure is read-only so it can safely be shared by
+ PFFFTD_Setup structure is read-only so it can safely be shared by
multiple concurrent threads.
*/
- PFFFT64_Setup *pffft64_new_setup(int N, pffft64_transform_t transform);
- void pffft64_destroy_setup(PFFFT64_Setup *);
+ PFFFTD_Setup *pffftd_new_setup(int N, pffftd_transform_t transform);
+ void pffftd_destroy_setup(PFFFTD_Setup *);
/*
Perform a Fourier transform , The z-domain data is stored in the
most efficient order for transforming it back, or using it for
@@ -125,7 +125,7 @@ extern "C" {
input and output may alias.
*/
- void pffft64_transform(PFFFT64_Setup *setup, const double *input, double *output, double *work, pffft64_direction_t direction);
+ void pffftd_transform(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffftd_direction_t direction);
/*
Similar to pffft_transform, but makes sure that the output is
@@ -134,7 +134,7 @@ extern "C" {
input and output may alias.
*/
- void pffft64_transform_ordered(PFFFT64_Setup *setup, const double *input, double *output, double *work, pffft64_direction_t direction);
+ void pffftd_transform_ordered(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffftd_direction_t direction);
/*
call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
@@ -148,7 +148,7 @@ extern "C" {
input and output should not alias.
*/
- void pffft64_zreorder(PFFFT64_Setup *setup, const double *input, double *output, pffft64_direction_t direction);
+ void pffftd_zreorder(PFFFTD_Setup *setup, const double *input, double *output, pffftd_direction_t direction);
/*
Perform a multiplication of the frequency components of dft_a and
@@ -162,36 +162,36 @@ extern "C" {
The dft_a, dft_b and dft_ab pointers may alias.
*/
- void pffft64_zconvolve_accumulate(PFFFT64_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling);
+ void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling);
/* simple helper to get minimum possible fft size */
- int pffft64_min_fft_size(pffft64_transform_t transform);
+ int pffftd_min_fft_size(pffftd_transform_t transform);
/* simple helper to determine next power of 2
- without inexact/rounding floating point operations
*/
- int pffft64_min_fft_size(int N);
+ int pffftd_min_fft_size(int N);
/* simple helper to determine next power of 2
- without inexact/rounding floating point operations
*/
- int pffft64_next_power_of_two(int N);
+ int pffftd_next_power_of_two(int N);
/* simple helper to determine if power of 2 - returns bool */
- int pffft64_is_power_of_two(int N);
+ int pffftd_is_power_of_two(int N);
/*
the double buffers must have the correct alignment (32-byte boundary
on intel and powerpc). This function may be used to obtain such
correctly aligned buffers.
*/
- void *pffft64_aligned_malloc(size_t nb_bytes);
- void pffft64_aligned_free(void *);
+ void *pffftd_aligned_malloc(size_t nb_bytes);
+ void pffftd_aligned_free(void *);
/* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */
- int pffft64_simd_size();
+ int pffftd_simd_size();
- void validate_pffft64_simd();
+ void validate_pffftd_simd();
#ifdef __cplusplus
}
diff --git a/test_pffft64.c b/test_pffft_double.c
index e8272ff..0b9d017 100644
--- a/test_pffft64.c
+++ b/test_pffft_double.c
@@ -6,21 +6,21 @@
How to build:
on linux, with fftw3:
- gcc -o test_pffft64 -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft64.c test_pffft64.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm
+ gcc -o test_pffftd -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffftd.c test_pffftd.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm
on macos, without fftw3:
- clang -o test_pffft64 -DHAVE_VECLIB -O3 -Wall -W pffft64.c test_pffft64.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate
+ clang -o test_pffftd -DHAVE_VECLIB -O3 -Wall -W pffftd.c test_pffftd.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate
on macos, with fftw3:
- clang -o test_pffft64 -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft64.c test_pffft64.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate
+ clang -o test_pffftd -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffftd.c test_pffftd.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate
as alternative: replace clang by gcc.
on windows, with visual c++:
- cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft64.c pffft64.c fftpack.c
+ cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffftd.c pffftd.c fftpack.c
build without SIMD instructions:
- gcc -o test_pffft64 -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft64.c test_pffft64.c fftpack.c -lm
+ gcc -o test_pffftd -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffftd.c test_pffftd.c fftpack.c -lm
*/
@@ -28,7 +28,7 @@
Note: adapted for double precision dynamic range version.
*/
-#include "pffft64.h"
+#include "pffft_double.h"
#include <math.h>
#include <stdio.h>
@@ -55,18 +55,18 @@ Note: adapted for double precision dynamic range version.
int test(int N, int cplx, int useOrdered) {
int Ndouble = (cplx ? N*2 : N);
- double *X = pffft64_aligned_malloc((unsigned)Ndouble * sizeof(double));
- double *Y = pffft64_aligned_malloc((unsigned)Ndouble * sizeof(double));
- double *Z = pffft64_aligned_malloc((unsigned)Ndouble * sizeof(double));
- double *W = pffft64_aligned_malloc((unsigned)Ndouble * sizeof(double));
+ double *X = pffftd_aligned_malloc((unsigned)Ndouble * sizeof(double));
+ double *Y = pffftd_aligned_malloc((unsigned)Ndouble * sizeof(double));
+ double *Z = pffftd_aligned_malloc((unsigned)Ndouble * sizeof(double));
+ double *W = pffftd_aligned_malloc((unsigned)Ndouble * sizeof(double));
int k, j, m, iter, kmaxOther, retError = 0;
double freq, dPhi, phi, phi0;
double pwr, pwrCar, pwrOther, err, errSum, mag, expextedMag;
double amp = 1.0F;
- assert( pffft64_is_power_of_two(N) );
+ assert( pffftd_is_power_of_two(N) );
- PFFFT64_Setup *s = pffft64_new_setup(N, cplx ? PFFFT64_COMPLEX : PFFFT64_REAL);
+ PFFFTD_Setup *s = pffftd_new_setup(N, cplx ? PFFFTD_COMPLEX : PFFFTD_REAL);
assert(s);
if (!s) {
printf("Error setting up PFFFT!\n");
@@ -108,11 +108,11 @@ int test(int N, int cplx, int useOrdered) {
/* forward transform from X --> Y .. using work buffer W */
if ( useOrdered )
- pffft64_transform_ordered(s, X, Y, W, PFFFT64_FORWARD );
+ pffftd_transform_ordered(s, X, Y, W, PFFFTD_FORWARD );
else
{
- pffft64_transform(s, X, Z, W, PFFFT64_FORWARD ); /* temporarily use Z for reordering */
- pffft64_zreorder(s, Z, Y, PFFFT64_FORWARD );
+ pffftd_transform(s, X, Z, W, PFFFTD_FORWARD ); /* temporarily use Z for reordering */
+ pffftd_zreorder(s, Z, Y, PFFFTD_FORWARD );
}
pwrOther = -1.0;
@@ -171,7 +171,7 @@ int test(int N, int cplx, int useOrdered) {
/* now convert spectrum back */
- pffft64_transform_ordered(s, Y, Z, W, PFFFT64_BACKWARD);
+ pffftd_transform_ordered(s, Y, Z, W, PFFFTD_BACKWARD);
errSum = 0.0;
for ( j = 0; j < (cplx ? (2*N) : N); ++j )
@@ -193,11 +193,11 @@ int test(int N, int cplx, int useOrdered) {
}
}
- pffft64_destroy_setup(s);
- pffft64_aligned_free(X);
- pffft64_aligned_free(Y);
- pffft64_aligned_free(Z);
- pffft64_aligned_free(W);
+ pffftd_destroy_setup(s);
+ pffftd_aligned_free(X);
+ pffftd_aligned_free(Y);
+ pffftd_aligned_free(Z);
+ pffftd_aligned_free(W);
return retError;
}
@@ -214,30 +214,30 @@ int main(int argc, char **argv)
resNextPw2 = 0;
resIsPw2 = 0;
for ( k = 0; k < (sizeof(inp_power_of_two)/sizeof(inp_power_of_two[0])); ++k) {
- N = pffft64_next_power_of_two(inp_power_of_two[k]);
+ N = pffftd_next_power_of_two(inp_power_of_two[k]);
if (N != ref_power_of_two[k]) {
resNextPw2 = 1;
- printf("pffft64_next_power_of_two(%d) does deliver %d, which is not reference result %d!\n",
+ printf("pffftd_next_power_of_two(%d) does deliver %d, which is not reference result %d!\n",
inp_power_of_two[k], N, ref_power_of_two[k] );
}
- result = pffft64_is_power_of_two(inp_power_of_two[k]);
+ result = pffftd_is_power_of_two(inp_power_of_two[k]);
if (inp_power_of_two[k] == ref_power_of_two[k]) {
if (!result) {
resIsPw2 = 1;
- printf("pffft64_is_power_of_two(%d) delivers false; expected true!\n", inp_power_of_two[k]);
+ printf("pffftd_is_power_of_two(%d) delivers false; expected true!\n", inp_power_of_two[k]);
}
} else {
if (result) {
resIsPw2 = 1;
- printf("pffft64_is_power_of_two(%d) delivers true; expected false!\n", inp_power_of_two[k]);
+ printf("pffftd_is_power_of_two(%d) delivers true; expected false!\n", inp_power_of_two[k]);
}
}
}
if (!resNextPw2)
- printf("tests for pffft64_next_power_of_two() succeeded successfully.\n");
+ printf("tests for pffftd_next_power_of_two() succeeded successfully.\n");
if (!resIsPw2)
- printf("tests for pffft64_is_power_of_two() succeeded successfully.\n");
+ printf("tests for pffftd_is_power_of_two() succeeded successfully.\n");
resFFT = 0;
for ( N = 32; N <= 65536; N *= 2 )
@@ -263,7 +263,7 @@ int main(int argc, char **argv)
}
if (!resFFT)
- printf("all pffft64 transform tests (FORWARD/BACKWARD, REAL/COMPLEX) succeeded successfully.\n");
+ printf("all pffftd transform tests (FORWARD/BACKWARD, REAL/COMPLEX) succeeded successfully.\n");
resAll = resNextPw2 | resIsPw2 | resFFT;
if (!resAll)