diff options
author | hayati ayguen <h_ayguen@web.de> | 2020-03-29 03:39:30 +0200 |
---|---|---|
committer | hayati ayguen <h_ayguen@web.de> | 2020-03-29 03:39:30 +0200 |
commit | c974c1d9e336bdd790260d96044207424384613c (patch) | |
tree | 4ee2c5936213ee02ece083cb14d586cb33f8408e | |
parent | 7b3ca7d7804ef7f650382a922c89571bf7fee980 (diff) | |
download | pffft-c974c1d9e336bdd790260d96044207424384613c.tar.gz |
removed most gcc extensions in source - but still required
* replaced the c++ style '//' comments
* added explicit math defines M_PI, .. - if not already defined
* gcc function (inline) attributes still require the extensions :-(
Signed-off-by: hayati ayguen <h_ayguen@web.de>
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | bench_pffft.c | 44 | ||||
-rw-r--r-- | fftpack.c | 12 | ||||
-rw-r--r-- | fftpack.h | 2 | ||||
-rw-r--r-- | pf_double.h | 11 | ||||
-rw-r--r-- | pf_float.h | 11 | ||||
-rw-r--r-- | pffastconv.h | 4 | ||||
-rw-r--r-- | pffft.h | 2 | ||||
-rw-r--r-- | pffft_double.h | 2 | ||||
-rw-r--r-- | pffft_priv_impl.h | 85 | ||||
-rw-r--r-- | test_pffft.c | 4 | ||||
-rw-r--r-- | test_pffft.cpp | 5 | ||||
-rw-r--r-- | test_pffft_double.c | 4 |
13 files changed, 115 insertions, 73 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index ea8c9cc..3dc5120 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ option(USE_ASAN "use GCC's address sanitizer?" OFF) set(CMAKE_C_STANDARD 90) -set(CMAKE_C_EXTENSIONS ON) # required for M_PI and M_SQRT2 +set(CMAKE_C_EXTENSIONS ON) # required for gcc function attribures (inline) set(CMAKE_CXX_STANDARD 98) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/bench_pffft.c b/bench_pffft.c index 1ff359e..6fc6ca4 100644 --- a/bench_pffft.c +++ b/bench_pffft.c @@ -57,6 +57,10 @@ # include <fftw3.h> #endif +#ifndef M_LN2 + #define M_LN2 0.69314718055994530942 /* log_e 2 */ +#endif + #define NUM_FFT_ALGOS 8 enum { @@ -80,7 +84,7 @@ enum { TYPE_MFLOPS = 5, /* MFlops/sec */ TYPE_DUR_TOT = 6 /* test duration in sec */ }; -// double tmeas[NUM_TYPES][NUM_FFT_ALGOS]; +/* double tmeas[NUM_TYPES][NUM_FFT_ALGOS]; */ const char * algoName[NUM_FFT_ALGOS] = { "FFTPack ", @@ -219,8 +223,8 @@ void pffft_validate_N(int N, int cplx) { for (pass=0; pass < 2; ++pass) { float ref_max = 0; int k; - //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); - // compute reference solution with FFTPACK + /* printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); */ + /* compute reference solution with FFTPACK */ if (pass == 0) { float *wrk = malloc(2*Nbytes+15*sizeof(float)); for (k=0; k < Nfloat; ++k) { @@ -230,7 +234,7 @@ void pffft_validate_N(int N, int cplx) { if (!cplx) { rffti(N, wrk); rfftf(N, ref, wrk); - // use our ordering for real ffts instead of the one of fftpack + /* use our ordering for real ffts instead of the one of fftpack */ { float refN=ref[N-1]; for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; @@ -246,9 +250,9 @@ void pffft_validate_N(int N, int cplx) { for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k])); - // pass 0 : non canonical ordering of transform coefficients + /* pass 0 : non canonical ordering of transform coefficients */ if (pass == 0) { - // test forward transform, with different input / output + /* test forward transform, with different input / output */ pffft_transform(s, in, tmp, 0, PFFFT_FORWARD); memcpy(tmp2, tmp, Nbytes); memcpy(tmp, in, Nbytes); @@ -257,7 +261,7 @@ void pffft_validate_N(int N, int cplx) { assert(tmp2[k] == tmp[k]); } - // test reordering + /* test reordering */ pffft_zreorder(s, tmp, out, PFFFT_FORWARD); pffft_zreorder(s, out, tmp, PFFFT_BACKWARD); for (k = 0; k < Nfloat; ++k) { @@ -265,7 +269,7 @@ void pffft_validate_N(int N, int cplx) { } pffft_zreorder(s, tmp, out, PFFFT_FORWARD); } else { - // pass 1 : canonical ordering of transform coeffs. + /* pass 1 : canonical ordering of transform coeffs. */ pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD); memcpy(tmp2, tmp, Nbytes); memcpy(tmp, in, Nbytes); @@ -302,7 +306,7 @@ void pffft_validate_N(int N, int cplx) { } } - // quick test of the circular convolution in fft domain + /* quick test of the circular convolution in fft domain */ { float conv_err = 0, conv_max = 0; @@ -486,7 +490,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl } - // FFTPack benchmark + /* FFTPack benchmark */ Nmax = (cplx ? N*2 : N); X[Nmax] = checkVal; { @@ -519,7 +523,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl free(wrk); - flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ tmeas[TYPE_ITER][ALGO_FFTPACK] = max_iter; tmeas[TYPE_MFLOPS][ALGO_FFTPACK] = flops/1e6/(t1 - t0 + 1e-16); tmeas[TYPE_DUR_TOT][ALGO_FFTPACK] = t1 - t0; @@ -563,7 +567,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl } while ( t1 < tstop ); vDSP_destroy_fftsetup(setup); - flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ tmeas[TYPE_ITER][ALGO_VECLIB] = max_iter; tmeas[TYPE_MFLOPS][ALGO_VECLIB] = flops/1e6/(t1 - t0 + 1e-16); tmeas[TYPE_DUR_TOT][ALGO_VECLIB] = t1 - t0; @@ -613,7 +617,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl fftwf_destroy_plan(planb); fftwf_free(in); fftwf_free(out); - flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ tmeas[TYPE_ITER][ALGO_FFTW_ESTIM] = max_iter; tmeas[TYPE_MFLOPS][ALGO_FFTW_ESTIM] = flops/1e6/(t1 - t0 + 1e-16); tmeas[TYPE_DUR_TOT][ALGO_FFTW_ESTIM] = t1 - t0; @@ -673,7 +677,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl fftwf_destroy_plan(planb); fftwf_free(in); fftwf_free(out); - flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ tmeas[TYPE_ITER][ALGO_FFTW_AUTO] = max_iter; tmeas[TYPE_MFLOPS][ALGO_FFTW_AUTO] = flops/1e6/(t1 - t0 + 1e-16); tmeas[TYPE_DUR_TOT][ALGO_FFTW_AUTO] = t1 - t0; @@ -717,7 +721,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl fftFree(); - flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ tmeas[TYPE_ITER][ALGO_GREEN] = max_iter; tmeas[TYPE_MFLOPS][ALGO_GREEN] = flops/1e6/(t1 - t0 + 1e-16); tmeas[TYPE_DUR_TOT][ALGO_GREEN] = t1 - t0; @@ -773,7 +777,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl kiss_fft_cleanup(); - flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ tmeas[TYPE_ITER][ALGO_KISS] = max_iter; tmeas[TYPE_MFLOPS][ALGO_KISS] = flops/1e6/(t1 - t0 + 1e-16); tmeas[TYPE_DUR_TOT][ALGO_KISS] = t1 - t0; @@ -786,7 +790,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl #endif - // PFFFT-U (unordered) benchmark + /* PFFFT-U (unordered) benchmark */ Nmax = (cplx ? pffftPow2N*2 : pffftPow2N); X[Nmax] = checkVal; { @@ -810,7 +814,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl pffft_destroy_setup(s); - flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ tmeas[TYPE_ITER][ALGO_PFFFT_U] = max_iter; tmeas[TYPE_MFLOPS][ALGO_PFFFT_U] = flops/1e6/(t1 - t0 + 1e-16); tmeas[TYPE_DUR_TOT][ALGO_PFFFT_U] = t1 - t0; @@ -840,7 +844,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl pffft_destroy_setup(s); - flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ tmeas[TYPE_ITER][ALGO_PFFFT_O] = max_iter; tmeas[TYPE_MFLOPS][ALGO_PFFFT_O] = flops/1e6/(t1 - t0 + 1e-16); tmeas[TYPE_DUR_TOT][ALGO_PFFFT_O] = t1 - t0; @@ -909,7 +913,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl } #ifndef PFFFT_SIMD_DISABLE -void validate_pffft_simd(); // a small function inside pffft.c that will detect compiler bugs with respect to simd instruction +void validate_pffft_simd(); /* a small function inside pffft.c that will detect compiler bugs with respect to simd instruction */ #endif @@ -66,6 +66,16 @@ static real c_abs(f77complex *c) { return sqrt(c->r*c->r + c->i*c->i); } static double dmax(double a, double b) { return a < b ? b : a; } #endif +/* define own constants required to turn off g++ extensions .. */ +#ifndef M_PI + #define M_PI 3.14159265358979323846 /* pi */ +#endif + +#ifndef M_SQRT2 + #define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +#endif + + /* translated by f2c (version 20061008), and slightly edited */ static void passfb(integer *nac, integer ido, integer ip, integer l1, integer idl1, @@ -3109,4 +3119,4 @@ int main(void) return all_ok ? 0 : 1; } -#endif //TESTING_FFTPACK +#endif /* TESTING_FFTPACK */ @@ -49,7 +49,7 @@ extern "C" { #endif -// just define FFTPACK_DOUBLE_PRECISION if you want to build it as a double precision fft +/* just define FFTPACK_DOUBLE_PRECISION if you want to build it as a double precision fft */ #ifndef FFTPACK_DOUBLE_PRECISION typedef float fftpack_real; diff --git a/pf_double.h b/pf_double.h index e9bf715..0951a24 100644 --- a/pf_double.h +++ b/pf_double.h @@ -69,11 +69,11 @@ #include "pf_scalar_double.h" -// shortcuts for complex multiplcations +/* shortcuts for complex multiplcations */ #define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); } #define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); } #ifndef SVMUL -// multiply a scalar with a vector +/* multiply a scalar with a vector */ #define SVMUL(f,v) VMUL(LD_PS1(f),v) #endif @@ -123,10 +123,11 @@ static void Vvalidate_simd() { a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15); } -#endif //!PFFFT_SIMD_DISABLE +#endif /* !PFFFT_SIMD_DISABLE */ -/* SSE and co like 16-bytes aligned pointers */ -#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines... +/* SSE and co like 16-bytes aligned pointers + * with a 64-byte alignment, we are even aligned on L2 cache lines... */ +#define MALLOC_V4SF_ALIGNMENT 64 static void *Valigned_malloc(size_t nb_bytes) { @@ -72,11 +72,11 @@ #include "pf_scalar_float.h" -// shortcuts for complex multiplcations +/* shortcuts for complex multiplcations */ #define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); } #define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); } #ifndef SVMUL -// multiply a scalar with a vector +/* multiply a scalar with a vector */ #define SVMUL(f,v) VMUL(LD_PS1(f),v) #endif @@ -126,10 +126,11 @@ static void Vvalidate_simd() { a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15); } -#endif //!PFFFT_SIMD_DISABLE +#endif /* !PFFFT_SIMD_DISABLE */ -/* SSE and co like 16-bytes aligned pointers */ -#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines... +/* SSE and co like 16-bytes aligned pointers + * with a 64-byte alignment, we are even aligned on L2 cache lines... */ +#define MALLOC_V4SF_ALIGNMENT 64 static void *Valigned_malloc(size_t nb_bytes) { diff --git a/pffastconv.h b/pffastconv.h index 11e82f8..6bc5e47 100644 --- a/pffastconv.h +++ b/pffastconv.h @@ -26,7 +26,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. */ - + /* PFFASTCONV : a Pretty Fast Fast Convolution @@ -50,7 +50,7 @@ #ifndef PFFASTCONV_H #define PFFASTCONV_H -#include <stddef.h> // for size_t +#include <stddef.h> /* for size_t */ #include "pffft.h" @@ -77,7 +77,7 @@ #ifndef PFFFT_H #define PFFFT_H -#include <stddef.h> // for size_t +#include <stddef.h> /* for size_t */ #ifdef __cplusplus extern "C" { diff --git a/pffft_double.h b/pffft_double.h index 71ea60a..ec3e08a 100644 --- a/pffft_double.h +++ b/pffft_double.h @@ -82,7 +82,7 @@ #ifndef PFFFT_DOUBLE_H #define PFFFT_DOUBLE_H -#include <stddef.h> // for size_t +#include <stddef.h> /* for size_t */ #ifdef __cplusplus extern "C" { diff --git a/pffft_priv_impl.h b/pffft_priv_impl.h index 33588b6..a7c8739 100644 --- a/pffft_priv_impl.h +++ b/pffft_priv_impl.h @@ -59,6 +59,16 @@ */ +/* define own constants required to turn off g++ extensions .. */ +#ifndef M_PI + #define M_PI 3.14159265358979323846 /* pi */ +#endif + +#ifndef M_SQRT2 + #define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +#endif + + /* detect bugs with the vector support macros */ void FUNC_VALIDATE_SIMD() { #ifndef PFFFT_SIMD_DISABLE @@ -435,7 +445,7 @@ static NEVER_INLINE(void) radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4s const v4sf *RESTRICT cc_ = cc, * RESTRICT cc_end = cc + l1ido; v4sf * RESTRICT ch_ = ch; while (cc < cc_end) { - // this loop represents between 25% and 40% of total radf4_ps cost ! + /* this loop represents between 25% and 40% of total radf4_ps cost ! */ v4sf a0 = cc[0], a1 = cc[l1ido]; v4sf a2 = cc[2*l1ido], a3 = cc[3*l1ido]; v4sf tr1 = VADD(a1, a3); @@ -482,11 +492,11 @@ static NEVER_INLINE(void) radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4s tr2 = VADD(pc[0],cr3); tr3 = VSUB(pc[0],cr3); ch[i - 1 + 4*k] = VADD(tr1,tr2); - ch[ic - 1 + 4*k + 3*ido] = VSUB(tr2,tr1); // at this point tr1 and tr2 can be disposed + ch[ic - 1 + 4*k + 3*ido] = VSUB(tr2,tr1); /* at this point tr1 and tr2 can be disposed */ ti1 = VADD(ci2,ci4); ti4 = VSUB(ci2,ci4); ch[i - 1 + 4*k + 2*ido] = VADD(ti4,tr3); - ch[ic - 1 + 4*k + 1*ido] = VSUB(tr3,ti4); // dispose tr3, ti4 + ch[ic - 1 + 4*k + 1*ido] = VSUB(tr3,ti4); /* dispose tr3, ti4 */ ti2 = VADD(pc[1],ci3); ti3 = VSUB(pc[1],ci3); ch[i + 4*k] = VADD(ti1, ti2); @@ -628,7 +638,7 @@ static void radf5_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT ch_ref(1, 3, k) = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4)); ch_ref(ido, 4, k) = VADD(cc_ref(1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3))); ch_ref(1, 5, k) = VSUB(SVMUL(ti12, ci5), SVMUL(ti11, ci4)); - //printf("pffft: radf5, k=%d ch_ref=%f, ci4=%f\n", k, ch_ref(1, 5, k), ci4); + /* printf("pffft: radf5, k=%d ch_ref=%f, ci4=%f\n", k, ch_ref(1, 5, k), ci4); */ } if (ido == 1) { return; @@ -654,11 +664,11 @@ static void radf5_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT cr4 = VSUB(di3, di4); ci3 = VADD(di3, di4); ch_ref(i - 1, 1, k) = VADD(cc_ref(i - 1, k, 1), VADD(cr2, cr3)); - ch_ref(i, 1, k) = VSUB(cc_ref(i, k, 1), VADD(ci2, ci3));// + ch_ref(i, 1, k) = VSUB(cc_ref(i, k, 1), VADD(ci2, ci3)); tr2 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr11, cr2), SVMUL(tr12, cr3))); - ti2 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr11, ci2), SVMUL(tr12, ci3)));// + ti2 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr11, ci2), SVMUL(tr12, ci3))); tr3 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3))); - ti3 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr12, ci2), SVMUL(tr11, ci3)));// + ti3 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr12, ci2), SVMUL(tr11, ci3))); tr5 = VADD(SVMUL(ti11, cr5), SVMUL(ti12, cr4)); ti5 = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4)); tr4 = VSUB(SVMUL(ti12, cr5), SVMUL(ti11, cr4)); @@ -1008,12 +1018,12 @@ static v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *wor struct SETUP_STRUCT { int N; - int Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) + int Ncvec; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */ int ifac[15]; pffft_transform_t transform; - v4sf *data; // allocated room for twiddle coefs - float *e; // points into 'data' , N/4*3 elements - float *twiddle; // points into 'data', N/4 elements + v4sf *data; /* allocated room for twiddle coefs */ + float *e; /* points into 'data', N/4*3 elements */ + float *twiddle; /* points into 'data', N/4 elements */ }; SETUP_STRUCT *FUNC_NEW_SETUP(int N, pffft_transform_t transform) { @@ -1024,7 +1034,7 @@ SETUP_STRUCT *FUNC_NEW_SETUP(int N, pffft_transform_t transform) { handle other cases (or maybe just switch to a scalar fft, I don't know..) */ if (transform == PFFFT_REAL) { assert((N%(2*SIMD_SZ*SIMD_SZ))==0 && N>0); } if (transform == PFFFT_COMPLEX) { assert((N%(SIMD_SZ*SIMD_SZ))==0 && N>0); } - //assert((N % 32) == 0); + /* assert((N % 32) == 0); */ s->N = N; s->transform = transform; /* nb of complex simd vectors */ @@ -1080,7 +1090,7 @@ static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) { int k; INTERLEAVE2(in[0], in[1], g0, g1); in += in_stride; - *--out = VSWAPHL(g0, g1); // [g0l, g0h], [g1l g1h] -> [g1l, g0h] + *--out = VSWAPHL(g0, g1); /* [g0l, g0h], [g1l g1h] -> [g1l, g0h] */ for (k=1; k < N; ++k) { v4sf h0, h1; INTERLEAVE2(in[0], in[1], h0, h1); in += in_stride; @@ -1146,7 +1156,7 @@ void FUNC_ZREORDER(SETUP_STRUCT *setup, const float *in, float *out, pffft_direc } void FUNC_CPLX_FINALIZE(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks + int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */ v4sf r0, i0, r1, i1, r2, i2, r3, i3; v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; assert(in != out); @@ -1190,7 +1200,7 @@ void FUNC_CPLX_FINALIZE(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { } void FUNC_CPLX_PREPROCESS(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks + int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */ v4sf r0, i0, r1, i1, r2, i2, r3, i3; v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1; assert(in != out); @@ -1245,15 +1255,15 @@ static ALWAYS_INLINE(void) FUNC_REAL_FINALIZE_4X4(const v4sf *in0, const v4sf *i [0 0 0 0 -1 1 -1 1] [i3] */ - //cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; - //cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; + /* cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */ + /* cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */ VCPLXMUL(r1,i1,e[0],e[1]); VCPLXMUL(r2,i2,e[2],e[3]); VCPLXMUL(r3,i3,e[4],e[5]); - //cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; - //cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; + /* cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */ + /* cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */ sr0 = VADD(r0,r2); dr0 = VSUB(r0,r2); sr1 = VADD(r1,r3); dr1 = VSUB(r3,r1); @@ -1281,7 +1291,7 @@ static ALWAYS_INLINE(void) FUNC_REAL_FINALIZE_4X4(const v4sf *in0, const v4sf *i } static NEVER_INLINE(void) FUNC_REAL_FINALIZE(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks + int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */ /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ v4sf_union cr, ci, *uout = (v4sf_union*)out; @@ -1374,7 +1384,7 @@ static ALWAYS_INLINE(void) FUNC_REAL_PREPROCESS_4X4(const v4sf *in, } static NEVER_INLINE(void) FUNC_REAL_PREPROCESS(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) { - int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks + int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */ /* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */ v4sf_union Xr, Xi, *uout = (v4sf_union*)out; @@ -1386,7 +1396,7 @@ static NEVER_INLINE(void) FUNC_REAL_PREPROCESS(int Ncvec, const v4sf *in, v4sf * Xi.f[k] = ((float*)in)[8*k+4]; } - FUNC_REAL_PREPROCESS_4X4(in, e, out+1, 1); // will write only 6 values + FUNC_REAL_PREPROCESS_4X4(in, e, out+1, 1); /* will write only 6 values */ /* [Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3] @@ -1420,7 +1430,7 @@ void FUNC_TRANSFORM_INTERNAL(SETUP_STRUCT *setup, const float *finput, float *fo int k, Ncvec = setup->Ncvec; int nf_odd = (setup->ifac[1] & 1); - // temporary buffer is allocated on the stack if the scratch pointer is NULL + /* temporary buffer is allocated on the stack if the scratch pointer is NULL */ int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); @@ -1431,7 +1441,7 @@ void FUNC_TRANSFORM_INTERNAL(SETUP_STRUCT *setup, const float *finput, float *fo assert(VALIGNED(finput) && VALIGNED(foutput)); - //assert(finput != foutput); + /* assert(finput != foutput); */ if (direction == PFFFT_FORWARD) { ib = !ib; if (setup->transform == PFFFT_REAL) { @@ -1452,7 +1462,7 @@ void FUNC_TRANSFORM_INTERNAL(SETUP_STRUCT *setup, const float *finput, float *fo } else ib = !ib; } else { if (vinput == buff[ib]) { - ib = !ib; // may happen when finput == foutput + ib = !ib; /* may happen when finput == foutput */ } if (ordered) { FUNC_ZREORDER(setup, (float*)vinput, (float*)buff[ib], PFFFT_BACKWARD); @@ -1522,7 +1532,9 @@ void FUNC_ZCONVOLVE_ACCUMULATE(SETUP_STRUCT *s, const float *a, const float *b, abr = ((v4sf_union*)vab)[0].f[0]; abi = ((v4sf_union*)vab)[1].f[0]; -#ifdef ZCONVOLVE_USING_INLINE_ASM // inline asm version, unfortunately miscompiled by clang 3.2, at least on ubuntu.. so this will be restricted to gcc +#ifdef ZCONVOLVE_USING_INLINE_ASM + /* inline asm version, unfortunately miscompiled by clang 3.2, + * at least on ubuntu.. so this will be restricted to gcc */ const float *a_ = a, *b_ = b; float *ab_ = ab; int N = Ncvec; asm volatile("mov r8, %2 \n" @@ -1558,7 +1570,8 @@ void FUNC_ZCONVOLVE_ACCUMULATE(SETUP_STRUCT *s, const float *a, const float *b, "subs %3, #2 \n" "bne 1b \n" : "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N) : "r"(scaling) : "r8", "q0","q1","q2","q3","q4","q5","q6","q7","q8","q9", "q10","q11","q12","q13","q15","memory"); -#else // default routine, works fine for non-arm cpus with current compilers +#else + /* default routine, works fine for non-arm cpus with current compilers */ for (i=0; i < Ncvec; i += 2) { v4sf ar, ai, br, bi; ar = va[2*i+0]; ai = va[2*i+1]; @@ -1634,9 +1647,9 @@ void FUNC_ZCONVOLVE_NO_ACCU(SETUP_STRUCT *s, const float *a, const float *b, flo } -#else // defined(PFFFT_SIMD_DISABLE) +#else /* defined(PFFFT_SIMD_DISABLE) */ -// standard routine using scalar floats, without SIMD stuff. +/* standard routine using scalar floats, without SIMD stuff. */ #define pffft_zreorder_nosimd FUNC_ZREORDER void pffft_zreorder_nosimd(SETUP_STRUCT *setup, const float *in, float *out, pffft_direction_t direction) { @@ -1664,7 +1677,7 @@ void pffft_transform_internal_nosimd(SETUP_STRUCT *setup, const float *input, fl int Ncvec = setup->Ncvec; int nf_odd = (setup->ifac[1] & 1); - // temporary buffer is allocated on the stack if the scratch pointer is NULL + /* temporary buffer is allocated on the stack if the scratch pointer is NULL */ int stack_allocate = (scratch == 0 ? Ncvec*2 : 1); VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate); float *buff[2]; @@ -1672,7 +1685,7 @@ void pffft_transform_internal_nosimd(SETUP_STRUCT *setup, const float *input, fl if (scratch == 0) scratch = scratch_on_stack; buff[0] = output; buff[1] = scratch; - if (setup->transform == PFFFT_COMPLEX) ordered = 0; // it is always ordered. + if (setup->transform == PFFFT_COMPLEX) ordered = 0; /* it is always ordered. */ ib = (nf_odd ^ ordered ? 1 : 0); if (direction == PFFFT_FORWARD) { @@ -1688,7 +1701,7 @@ void pffft_transform_internal_nosimd(SETUP_STRUCT *setup, const float *input, fl } } else { if (input == buff[ib]) { - ib = !ib; // may happen when finput == foutput + ib = !ib; /* may happen when finput == foutput */ } if (ordered) { FUNC_ZREORDER(setup, input, buff[!ib], PFFFT_BACKWARD); @@ -1704,7 +1717,7 @@ void pffft_transform_internal_nosimd(SETUP_STRUCT *setup, const float *input, fl } if (buff[ib] != output) { int k; - // extra copy required -- this situation should happens only when finput == foutput + /* extra copy required -- this situation should happens only when finput == foutput */ assert(input==output); for (k=0; k < Ncvec; ++k) { float a = buff[ib][2*k], b = buff[ib][2*k+1]; @@ -1722,7 +1735,7 @@ void pffft_zconvolve_accumulate_nosimd(SETUP_STRUCT *s, const float *a, const fl int k; /* was i -- but always used "2*i" - except at for() */ if (s->transform == PFFFT_REAL) { - // take care of the fftpack ordering + /* take care of the fftpack ordering */ ab[0] += a[0]*b[0]*scaling; ab[NcvecMulTwo-1] += a[NcvecMulTwo-1]*b[NcvecMulTwo-1]*scaling; ++ab; ++a; ++b; NcvecMulTwo -= 2; @@ -1744,7 +1757,7 @@ void pffft_zconvolve_no_accu_nosimd(SETUP_STRUCT *s, const float *a, const float int k; /* was i -- but always used "2*i" - except at for() */ if (s->transform == PFFFT_REAL) { - // take care of the fftpack ordering + /* take care of the fftpack ordering */ ab[0] += a[0]*b[0]*scaling; ab[NcvecMulTwo-1] += a[NcvecMulTwo-1]*b[NcvecMulTwo-1]*scaling; ++ab; ++a; ++b; NcvecMulTwo -= 2; @@ -1760,7 +1773,7 @@ void pffft_zconvolve_no_accu_nosimd(SETUP_STRUCT *s, const float *a, const float } -#endif // defined(PFFFT_SIMD_DISABLE) +#endif /* defined(PFFFT_SIMD_DISABLE) */ void FUNC_TRANSFORM_UNORDRD(SETUP_STRUCT *setup, const float *input, float *output, float *work, pffft_direction_t direction) { FUNC_TRANSFORM_INTERNAL(setup, input, output, (v4sf*)work, direction, 0); diff --git a/test_pffft.c b/test_pffft.c index 99a4e11..64e0ba7 100644 --- a/test_pffft.c +++ b/test_pffft.c @@ -33,6 +33,10 @@ #include <assert.h> #include <string.h> +/* define own constants required to turn off g++ extensions .. */ +#ifndef M_PI + #define M_PI 3.14159265358979323846 /* pi */ +#endif /* EXPECTED_DYN_RANGE in dB: * single precision float has 24 bits mantissa diff --git a/test_pffft.cpp b/test_pffft.cpp index f5d085d..a544870 100644 --- a/test_pffft.cpp +++ b/test_pffft.cpp @@ -41,6 +41,11 @@ #include <string.h> #include <time.h> +/* define own constants required to turn off g++ extensions .. */ +#ifndef M_PI + #define M_PI 3.14159265358979323846 /* pi */ +#endif + /* maximum allowed phase error in degree */ #define DEG_ERR_LIMIT 1E-4 diff --git a/test_pffft_double.c b/test_pffft_double.c index 51e95ac..4a702ec 100644 --- a/test_pffft_double.c +++ b/test_pffft_double.c @@ -37,6 +37,10 @@ Note: adapted for double precision dynamic range version. #include <assert.h> #include <string.h> +/* define own constants required to turn off g++ extensions .. */ +#ifndef M_PI + #define M_PI 3.14159265358979323846 /* pi */ +#endif #define EXPECTED_DYN_RANGE 215.0 |