aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhayati ayguen <h_ayguen@web.de>2020-03-29 03:39:30 +0200
committerhayati ayguen <h_ayguen@web.de>2020-03-29 03:39:30 +0200
commitc974c1d9e336bdd790260d96044207424384613c (patch)
tree4ee2c5936213ee02ece083cb14d586cb33f8408e
parent7b3ca7d7804ef7f650382a922c89571bf7fee980 (diff)
downloadpffft-c974c1d9e336bdd790260d96044207424384613c.tar.gz
removed most gcc extensions in source - but still required
* replaced the c++ style '//' comments * added explicit math defines M_PI, .. - if not already defined * gcc function (inline) attributes still require the extensions :-( Signed-off-by: hayati ayguen <h_ayguen@web.de>
-rw-r--r--CMakeLists.txt2
-rw-r--r--bench_pffft.c44
-rw-r--r--fftpack.c12
-rw-r--r--fftpack.h2
-rw-r--r--pf_double.h11
-rw-r--r--pf_float.h11
-rw-r--r--pffastconv.h4
-rw-r--r--pffft.h2
-rw-r--r--pffft_double.h2
-rw-r--r--pffft_priv_impl.h85
-rw-r--r--test_pffft.c4
-rw-r--r--test_pffft.cpp5
-rw-r--r--test_pffft_double.c4
13 files changed, 115 insertions, 73 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea8c9cc..3dc5120 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,7 +11,7 @@ option(USE_ASAN "use GCC's address sanitizer?" OFF)
set(CMAKE_C_STANDARD 90)
-set(CMAKE_C_EXTENSIONS ON) # required for M_PI and M_SQRT2
+set(CMAKE_C_EXTENSIONS ON) # required for gcc function attribures (inline)
set(CMAKE_CXX_STANDARD 98)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
diff --git a/bench_pffft.c b/bench_pffft.c
index 1ff359e..6fc6ca4 100644
--- a/bench_pffft.c
+++ b/bench_pffft.c
@@ -57,6 +57,10 @@
# include <fftw3.h>
#endif
+#ifndef M_LN2
+ #define M_LN2 0.69314718055994530942 /* log_e 2 */
+#endif
+
#define NUM_FFT_ALGOS 8
enum {
@@ -80,7 +84,7 @@ enum {
TYPE_MFLOPS = 5, /* MFlops/sec */
TYPE_DUR_TOT = 6 /* test duration in sec */
};
-// double tmeas[NUM_TYPES][NUM_FFT_ALGOS];
+/* double tmeas[NUM_TYPES][NUM_FFT_ALGOS]; */
const char * algoName[NUM_FFT_ALGOS] = {
"FFTPack ",
@@ -219,8 +223,8 @@ void pffft_validate_N(int N, int cplx) {
for (pass=0; pass < 2; ++pass) {
float ref_max = 0;
int k;
- //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx);
- // compute reference solution with FFTPACK
+ /* printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); */
+ /* compute reference solution with FFTPACK */
if (pass == 0) {
float *wrk = malloc(2*Nbytes+15*sizeof(float));
for (k=0; k < Nfloat; ++k) {
@@ -230,7 +234,7 @@ void pffft_validate_N(int N, int cplx) {
if (!cplx) {
rffti(N, wrk);
rfftf(N, ref, wrk);
- // use our ordering for real ffts instead of the one of fftpack
+ /* use our ordering for real ffts instead of the one of fftpack */
{
float refN=ref[N-1];
for (k=N-2; k >= 1; --k) ref[k+1] = ref[k];
@@ -246,9 +250,9 @@ void pffft_validate_N(int N, int cplx) {
for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k]));
- // pass 0 : non canonical ordering of transform coefficients
+ /* pass 0 : non canonical ordering of transform coefficients */
if (pass == 0) {
- // test forward transform, with different input / output
+ /* test forward transform, with different input / output */
pffft_transform(s, in, tmp, 0, PFFFT_FORWARD);
memcpy(tmp2, tmp, Nbytes);
memcpy(tmp, in, Nbytes);
@@ -257,7 +261,7 @@ void pffft_validate_N(int N, int cplx) {
assert(tmp2[k] == tmp[k]);
}
- // test reordering
+ /* test reordering */
pffft_zreorder(s, tmp, out, PFFFT_FORWARD);
pffft_zreorder(s, out, tmp, PFFFT_BACKWARD);
for (k = 0; k < Nfloat; ++k) {
@@ -265,7 +269,7 @@ void pffft_validate_N(int N, int cplx) {
}
pffft_zreorder(s, tmp, out, PFFFT_FORWARD);
} else {
- // pass 1 : canonical ordering of transform coeffs.
+ /* pass 1 : canonical ordering of transform coeffs. */
pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD);
memcpy(tmp2, tmp, Nbytes);
memcpy(tmp, in, Nbytes);
@@ -302,7 +306,7 @@ void pffft_validate_N(int N, int cplx) {
}
}
- // quick test of the circular convolution in fft domain
+ /* quick test of the circular convolution in fft domain */
{
float conv_err = 0, conv_max = 0;
@@ -486,7 +490,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
}
- // FFTPack benchmark
+ /* FFTPack benchmark */
Nmax = (cplx ? N*2 : N);
X[Nmax] = checkVal;
{
@@ -519,7 +523,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
free(wrk);
- flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
+ flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
tmeas[TYPE_ITER][ALGO_FFTPACK] = max_iter;
tmeas[TYPE_MFLOPS][ALGO_FFTPACK] = flops/1e6/(t1 - t0 + 1e-16);
tmeas[TYPE_DUR_TOT][ALGO_FFTPACK] = t1 - t0;
@@ -563,7 +567,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
} while ( t1 < tstop );
vDSP_destroy_fftsetup(setup);
- flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
+ flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
tmeas[TYPE_ITER][ALGO_VECLIB] = max_iter;
tmeas[TYPE_MFLOPS][ALGO_VECLIB] = flops/1e6/(t1 - t0 + 1e-16);
tmeas[TYPE_DUR_TOT][ALGO_VECLIB] = t1 - t0;
@@ -613,7 +617,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
fftwf_destroy_plan(planb);
fftwf_free(in); fftwf_free(out);
- flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
+ flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
tmeas[TYPE_ITER][ALGO_FFTW_ESTIM] = max_iter;
tmeas[TYPE_MFLOPS][ALGO_FFTW_ESTIM] = flops/1e6/(t1 - t0 + 1e-16);
tmeas[TYPE_DUR_TOT][ALGO_FFTW_ESTIM] = t1 - t0;
@@ -673,7 +677,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
fftwf_destroy_plan(planb);
fftwf_free(in); fftwf_free(out);
- flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
+ flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
tmeas[TYPE_ITER][ALGO_FFTW_AUTO] = max_iter;
tmeas[TYPE_MFLOPS][ALGO_FFTW_AUTO] = flops/1e6/(t1 - t0 + 1e-16);
tmeas[TYPE_DUR_TOT][ALGO_FFTW_AUTO] = t1 - t0;
@@ -717,7 +721,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
fftFree();
- flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
+ flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
tmeas[TYPE_ITER][ALGO_GREEN] = max_iter;
tmeas[TYPE_MFLOPS][ALGO_GREEN] = flops/1e6/(t1 - t0 + 1e-16);
tmeas[TYPE_DUR_TOT][ALGO_GREEN] = t1 - t0;
@@ -773,7 +777,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
kiss_fft_cleanup();
- flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
+ flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
tmeas[TYPE_ITER][ALGO_KISS] = max_iter;
tmeas[TYPE_MFLOPS][ALGO_KISS] = flops/1e6/(t1 - t0 + 1e-16);
tmeas[TYPE_DUR_TOT][ALGO_KISS] = t1 - t0;
@@ -786,7 +790,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
#endif
- // PFFFT-U (unordered) benchmark
+ /* PFFFT-U (unordered) benchmark */
Nmax = (cplx ? pffftPow2N*2 : pffftPow2N);
X[Nmax] = checkVal;
{
@@ -810,7 +814,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
pffft_destroy_setup(s);
- flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
+ flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
tmeas[TYPE_ITER][ALGO_PFFFT_U] = max_iter;
tmeas[TYPE_MFLOPS][ALGO_PFFFT_U] = flops/1e6/(t1 - t0 + 1e-16);
tmeas[TYPE_DUR_TOT][ALGO_PFFFT_U] = t1 - t0;
@@ -840,7 +844,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
pffft_destroy_setup(s);
- flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
+ flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
tmeas[TYPE_ITER][ALGO_PFFFT_O] = max_iter;
tmeas[TYPE_MFLOPS][ALGO_PFFFT_O] = flops/1e6/(t1 - t0 + 1e-16);
tmeas[TYPE_DUR_TOT][ALGO_PFFFT_O] = t1 - t0;
@@ -909,7 +913,7 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl
}
#ifndef PFFFT_SIMD_DISABLE
-void validate_pffft_simd(); // a small function inside pffft.c that will detect compiler bugs with respect to simd instruction
+void validate_pffft_simd(); /* a small function inside pffft.c that will detect compiler bugs with respect to simd instruction */
#endif
diff --git a/fftpack.c b/fftpack.c
index b6375a8..d412780 100644
--- a/fftpack.c
+++ b/fftpack.c
@@ -66,6 +66,16 @@ static real c_abs(f77complex *c) { return sqrt(c->r*c->r + c->i*c->i); }
static double dmax(double a, double b) { return a < b ? b : a; }
#endif
+/* define own constants required to turn off g++ extensions .. */
+#ifndef M_PI
+ #define M_PI 3.14159265358979323846 /* pi */
+#endif
+
+#ifndef M_SQRT2
+ #define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
+#endif
+
+
/* translated by f2c (version 20061008), and slightly edited */
static void passfb(integer *nac, integer ido, integer ip, integer l1, integer idl1,
@@ -3109,4 +3119,4 @@ int main(void)
return all_ok ? 0 : 1;
}
-#endif //TESTING_FFTPACK
+#endif /* TESTING_FFTPACK */
diff --git a/fftpack.h b/fftpack.h
index 381bcc6..45cb742 100644
--- a/fftpack.h
+++ b/fftpack.h
@@ -49,7 +49,7 @@
extern "C" {
#endif
-// just define FFTPACK_DOUBLE_PRECISION if you want to build it as a double precision fft
+/* just define FFTPACK_DOUBLE_PRECISION if you want to build it as a double precision fft */
#ifndef FFTPACK_DOUBLE_PRECISION
typedef float fftpack_real;
diff --git a/pf_double.h b/pf_double.h
index e9bf715..0951a24 100644
--- a/pf_double.h
+++ b/pf_double.h
@@ -69,11 +69,11 @@
#include "pf_scalar_double.h"
-// shortcuts for complex multiplcations
+/* shortcuts for complex multiplcations */
#define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); }
#define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); }
#ifndef SVMUL
-// multiply a scalar with a vector
+/* multiply a scalar with a vector */
#define SVMUL(f,v) VMUL(LD_PS1(f),v)
#endif
@@ -123,10 +123,11 @@ static void Vvalidate_simd() {
a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
}
-#endif //!PFFFT_SIMD_DISABLE
+#endif /* !PFFFT_SIMD_DISABLE */
-/* SSE and co like 16-bytes aligned pointers */
-#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines...
+/* SSE and co like 16-bytes aligned pointers
+ * with a 64-byte alignment, we are even aligned on L2 cache lines... */
+#define MALLOC_V4SF_ALIGNMENT 64
static
void *Valigned_malloc(size_t nb_bytes) {
diff --git a/pf_float.h b/pf_float.h
index f203f1c..3c0aa4b 100644
--- a/pf_float.h
+++ b/pf_float.h
@@ -72,11 +72,11 @@
#include "pf_scalar_float.h"
-// shortcuts for complex multiplcations
+/* shortcuts for complex multiplcations */
#define VCPLXMUL(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VSUB(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VADD(ai,tmp); }
#define VCPLXMULCONJ(ar,ai,br,bi) { v4sf tmp; tmp=VMUL(ar,bi); ar=VMUL(ar,br); ar=VADD(ar,VMUL(ai,bi)); ai=VMUL(ai,br); ai=VSUB(ai,tmp); }
#ifndef SVMUL
-// multiply a scalar with a vector
+/* multiply a scalar with a vector */
#define SVMUL(f,v) VMUL(LD_PS1(f),v)
#endif
@@ -126,10 +126,11 @@ static void Vvalidate_simd() {
a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
}
-#endif //!PFFFT_SIMD_DISABLE
+#endif /* !PFFFT_SIMD_DISABLE */
-/* SSE and co like 16-bytes aligned pointers */
-#define MALLOC_V4SF_ALIGNMENT 64 // with a 64-byte alignment, we are even aligned on L2 cache lines...
+/* SSE and co like 16-bytes aligned pointers
+ * with a 64-byte alignment, we are even aligned on L2 cache lines... */
+#define MALLOC_V4SF_ALIGNMENT 64
static
void *Valigned_malloc(size_t nb_bytes) {
diff --git a/pffastconv.h b/pffastconv.h
index 11e82f8..6bc5e47 100644
--- a/pffastconv.h
+++ b/pffastconv.h
@@ -26,7 +26,7 @@
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
*/
-
+
/*
PFFASTCONV : a Pretty Fast Fast Convolution
@@ -50,7 +50,7 @@
#ifndef PFFASTCONV_H
#define PFFASTCONV_H
-#include <stddef.h> // for size_t
+#include <stddef.h> /* for size_t */
#include "pffft.h"
diff --git a/pffft.h b/pffft.h
index e6bd256..9d7e862 100644
--- a/pffft.h
+++ b/pffft.h
@@ -77,7 +77,7 @@
#ifndef PFFFT_H
#define PFFFT_H
-#include <stddef.h> // for size_t
+#include <stddef.h> /* for size_t */
#ifdef __cplusplus
extern "C" {
diff --git a/pffft_double.h b/pffft_double.h
index 71ea60a..ec3e08a 100644
--- a/pffft_double.h
+++ b/pffft_double.h
@@ -82,7 +82,7 @@
#ifndef PFFFT_DOUBLE_H
#define PFFFT_DOUBLE_H
-#include <stddef.h> // for size_t
+#include <stddef.h> /* for size_t */
#ifdef __cplusplus
extern "C" {
diff --git a/pffft_priv_impl.h b/pffft_priv_impl.h
index 33588b6..a7c8739 100644
--- a/pffft_priv_impl.h
+++ b/pffft_priv_impl.h
@@ -59,6 +59,16 @@
*/
+/* define own constants required to turn off g++ extensions .. */
+#ifndef M_PI
+ #define M_PI 3.14159265358979323846 /* pi */
+#endif
+
+#ifndef M_SQRT2
+ #define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
+#endif
+
+
/* detect bugs with the vector support macros */
void FUNC_VALIDATE_SIMD() {
#ifndef PFFFT_SIMD_DISABLE
@@ -435,7 +445,7 @@ static NEVER_INLINE(void) radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4s
const v4sf *RESTRICT cc_ = cc, * RESTRICT cc_end = cc + l1ido;
v4sf * RESTRICT ch_ = ch;
while (cc < cc_end) {
- // this loop represents between 25% and 40% of total radf4_ps cost !
+ /* this loop represents between 25% and 40% of total radf4_ps cost ! */
v4sf a0 = cc[0], a1 = cc[l1ido];
v4sf a2 = cc[2*l1ido], a3 = cc[3*l1ido];
v4sf tr1 = VADD(a1, a3);
@@ -482,11 +492,11 @@ static NEVER_INLINE(void) radf4_ps(int ido, int l1, const v4sf *RESTRICT cc, v4s
tr2 = VADD(pc[0],cr3);
tr3 = VSUB(pc[0],cr3);
ch[i - 1 + 4*k] = VADD(tr1,tr2);
- ch[ic - 1 + 4*k + 3*ido] = VSUB(tr2,tr1); // at this point tr1 and tr2 can be disposed
+ ch[ic - 1 + 4*k + 3*ido] = VSUB(tr2,tr1); /* at this point tr1 and tr2 can be disposed */
ti1 = VADD(ci2,ci4);
ti4 = VSUB(ci2,ci4);
ch[i - 1 + 4*k + 2*ido] = VADD(ti4,tr3);
- ch[ic - 1 + 4*k + 1*ido] = VSUB(tr3,ti4); // dispose tr3, ti4
+ ch[ic - 1 + 4*k + 1*ido] = VSUB(tr3,ti4); /* dispose tr3, ti4 */
ti2 = VADD(pc[1],ci3);
ti3 = VSUB(pc[1],ci3);
ch[i + 4*k] = VADD(ti1, ti2);
@@ -628,7 +638,7 @@ static void radf5_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT
ch_ref(1, 3, k) = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4));
ch_ref(ido, 4, k) = VADD(cc_ref(1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3)));
ch_ref(1, 5, k) = VSUB(SVMUL(ti12, ci5), SVMUL(ti11, ci4));
- //printf("pffft: radf5, k=%d ch_ref=%f, ci4=%f\n", k, ch_ref(1, 5, k), ci4);
+ /* printf("pffft: radf5, k=%d ch_ref=%f, ci4=%f\n", k, ch_ref(1, 5, k), ci4); */
}
if (ido == 1) {
return;
@@ -654,11 +664,11 @@ static void radf5_ps(int ido, int l1, const v4sf * RESTRICT cc, v4sf * RESTRICT
cr4 = VSUB(di3, di4);
ci3 = VADD(di3, di4);
ch_ref(i - 1, 1, k) = VADD(cc_ref(i - 1, k, 1), VADD(cr2, cr3));
- ch_ref(i, 1, k) = VSUB(cc_ref(i, k, 1), VADD(ci2, ci3));//
+ ch_ref(i, 1, k) = VSUB(cc_ref(i, k, 1), VADD(ci2, ci3));
tr2 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr11, cr2), SVMUL(tr12, cr3)));
- ti2 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr11, ci2), SVMUL(tr12, ci3)));//
+ ti2 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr11, ci2), SVMUL(tr12, ci3)));
tr3 = VADD(cc_ref(i - 1, k, 1), VADD(SVMUL(tr12, cr2), SVMUL(tr11, cr3)));
- ti3 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr12, ci2), SVMUL(tr11, ci3)));//
+ ti3 = VSUB(cc_ref(i, k, 1), VADD(SVMUL(tr12, ci2), SVMUL(tr11, ci3)));
tr5 = VADD(SVMUL(ti11, cr5), SVMUL(ti12, cr4));
ti5 = VADD(SVMUL(ti11, ci5), SVMUL(ti12, ci4));
tr4 = VSUB(SVMUL(ti12, cr5), SVMUL(ti11, cr4));
@@ -1008,12 +1018,12 @@ static v4sf *cfftf1_ps(int n, const v4sf *input_readonly, v4sf *work1, v4sf *wor
struct SETUP_STRUCT {
int N;
- int Ncvec; // nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL)
+ int Ncvec; /* nb of complex simd vectors (N/4 if PFFFT_COMPLEX, N/8 if PFFFT_REAL) */
int ifac[15];
pffft_transform_t transform;
- v4sf *data; // allocated room for twiddle coefs
- float *e; // points into 'data' , N/4*3 elements
- float *twiddle; // points into 'data', N/4 elements
+ v4sf *data; /* allocated room for twiddle coefs */
+ float *e; /* points into 'data', N/4*3 elements */
+ float *twiddle; /* points into 'data', N/4 elements */
};
SETUP_STRUCT *FUNC_NEW_SETUP(int N, pffft_transform_t transform) {
@@ -1024,7 +1034,7 @@ SETUP_STRUCT *FUNC_NEW_SETUP(int N, pffft_transform_t transform) {
handle other cases (or maybe just switch to a scalar fft, I don't know..) */
if (transform == PFFFT_REAL) { assert((N%(2*SIMD_SZ*SIMD_SZ))==0 && N>0); }
if (transform == PFFFT_COMPLEX) { assert((N%(SIMD_SZ*SIMD_SZ))==0 && N>0); }
- //assert((N % 32) == 0);
+ /* assert((N % 32) == 0); */
s->N = N;
s->transform = transform;
/* nb of complex simd vectors */
@@ -1080,7 +1090,7 @@ static void reversed_copy(int N, const v4sf *in, int in_stride, v4sf *out) {
int k;
INTERLEAVE2(in[0], in[1], g0, g1); in += in_stride;
- *--out = VSWAPHL(g0, g1); // [g0l, g0h], [g1l g1h] -> [g1l, g0h]
+ *--out = VSWAPHL(g0, g1); /* [g0l, g0h], [g1l g1h] -> [g1l, g0h] */
for (k=1; k < N; ++k) {
v4sf h0, h1;
INTERLEAVE2(in[0], in[1], h0, h1); in += in_stride;
@@ -1146,7 +1156,7 @@ void FUNC_ZREORDER(SETUP_STRUCT *setup, const float *in, float *out, pffft_direc
}
void FUNC_CPLX_FINALIZE(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
- int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks
+ int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
v4sf r0, i0, r1, i1, r2, i2, r3, i3;
v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
assert(in != out);
@@ -1190,7 +1200,7 @@ void FUNC_CPLX_FINALIZE(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
}
void FUNC_CPLX_PREPROCESS(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
- int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks
+ int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
v4sf r0, i0, r1, i1, r2, i2, r3, i3;
v4sf sr0, dr0, sr1, dr1, si0, di0, si1, di1;
assert(in != out);
@@ -1245,15 +1255,15 @@ static ALWAYS_INLINE(void) FUNC_REAL_FINALIZE_4X4(const v4sf *in0, const v4sf *i
[0 0 0 0 -1 1 -1 1] [i3]
*/
- //cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n";
- //cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n";
+ /* cerr << "matrix initial, before e , REAL:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */
+ /* cerr << "matrix initial, before e, IMAG :\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */
VCPLXMUL(r1,i1,e[0],e[1]);
VCPLXMUL(r2,i2,e[2],e[3]);
VCPLXMUL(r3,i3,e[4],e[5]);
- //cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n";
- //cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n";
+ /* cerr << "matrix initial, real part:\n 1: " << r0 << "\n 1: " << r1 << "\n 1: " << r2 << "\n 1: " << r3 << "\n"; */
+ /* cerr << "matrix initial, imag part:\n 1: " << i0 << "\n 1: " << i1 << "\n 1: " << i2 << "\n 1: " << i3 << "\n"; */
sr0 = VADD(r0,r2); dr0 = VSUB(r0,r2);
sr1 = VADD(r1,r3); dr1 = VSUB(r3,r1);
@@ -1281,7 +1291,7 @@ static ALWAYS_INLINE(void) FUNC_REAL_FINALIZE_4X4(const v4sf *in0, const v4sf *i
}
static NEVER_INLINE(void) FUNC_REAL_FINALIZE(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
- int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks
+ int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
/* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
v4sf_union cr, ci, *uout = (v4sf_union*)out;
@@ -1374,7 +1384,7 @@ static ALWAYS_INLINE(void) FUNC_REAL_PREPROCESS_4X4(const v4sf *in,
}
static NEVER_INLINE(void) FUNC_REAL_PREPROCESS(int Ncvec, const v4sf *in, v4sf *out, const v4sf *e) {
- int k, dk = Ncvec/SIMD_SZ; // number of 4x4 matrix blocks
+ int k, dk = Ncvec/SIMD_SZ; /* number of 4x4 matrix blocks */
/* fftpack order is f0r f1r f1i f2r f2i ... f(n-1)r f(n-1)i f(n)r */
v4sf_union Xr, Xi, *uout = (v4sf_union*)out;
@@ -1386,7 +1396,7 @@ static NEVER_INLINE(void) FUNC_REAL_PREPROCESS(int Ncvec, const v4sf *in, v4sf *
Xi.f[k] = ((float*)in)[8*k+4];
}
- FUNC_REAL_PREPROCESS_4X4(in, e, out+1, 1); // will write only 6 values
+ FUNC_REAL_PREPROCESS_4X4(in, e, out+1, 1); /* will write only 6 values */
/*
[Xr0 Xr1 Xr2 Xr3 Xi0 Xi1 Xi2 Xi3]
@@ -1420,7 +1430,7 @@ void FUNC_TRANSFORM_INTERNAL(SETUP_STRUCT *setup, const float *finput, float *fo
int k, Ncvec = setup->Ncvec;
int nf_odd = (setup->ifac[1] & 1);
- // temporary buffer is allocated on the stack if the scratch pointer is NULL
+ /* temporary buffer is allocated on the stack if the scratch pointer is NULL */
int stack_allocate = (scratch == 0 ? Ncvec*2 : 1);
VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate);
@@ -1431,7 +1441,7 @@ void FUNC_TRANSFORM_INTERNAL(SETUP_STRUCT *setup, const float *finput, float *fo
assert(VALIGNED(finput) && VALIGNED(foutput));
- //assert(finput != foutput);
+ /* assert(finput != foutput); */
if (direction == PFFFT_FORWARD) {
ib = !ib;
if (setup->transform == PFFFT_REAL) {
@@ -1452,7 +1462,7 @@ void FUNC_TRANSFORM_INTERNAL(SETUP_STRUCT *setup, const float *finput, float *fo
} else ib = !ib;
} else {
if (vinput == buff[ib]) {
- ib = !ib; // may happen when finput == foutput
+ ib = !ib; /* may happen when finput == foutput */
}
if (ordered) {
FUNC_ZREORDER(setup, (float*)vinput, (float*)buff[ib], PFFFT_BACKWARD);
@@ -1522,7 +1532,9 @@ void FUNC_ZCONVOLVE_ACCUMULATE(SETUP_STRUCT *s, const float *a, const float *b,
abr = ((v4sf_union*)vab)[0].f[0];
abi = ((v4sf_union*)vab)[1].f[0];
-#ifdef ZCONVOLVE_USING_INLINE_ASM // inline asm version, unfortunately miscompiled by clang 3.2, at least on ubuntu.. so this will be restricted to gcc
+#ifdef ZCONVOLVE_USING_INLINE_ASM
+ /* inline asm version, unfortunately miscompiled by clang 3.2,
+ * at least on ubuntu.. so this will be restricted to gcc */
const float *a_ = a, *b_ = b; float *ab_ = ab;
int N = Ncvec;
asm volatile("mov r8, %2 \n"
@@ -1558,7 +1570,8 @@ void FUNC_ZCONVOLVE_ACCUMULATE(SETUP_STRUCT *s, const float *a, const float *b,
"subs %3, #2 \n"
"bne 1b \n"
: "+r"(a_), "+r"(b_), "+r"(ab_), "+r"(N) : "r"(scaling) : "r8", "q0","q1","q2","q3","q4","q5","q6","q7","q8","q9", "q10","q11","q12","q13","q15","memory");
-#else // default routine, works fine for non-arm cpus with current compilers
+#else
+ /* default routine, works fine for non-arm cpus with current compilers */
for (i=0; i < Ncvec; i += 2) {
v4sf ar, ai, br, bi;
ar = va[2*i+0]; ai = va[2*i+1];
@@ -1634,9 +1647,9 @@ void FUNC_ZCONVOLVE_NO_ACCU(SETUP_STRUCT *s, const float *a, const float *b, flo
}
-#else // defined(PFFFT_SIMD_DISABLE)
+#else /* defined(PFFFT_SIMD_DISABLE) */
-// standard routine using scalar floats, without SIMD stuff.
+/* standard routine using scalar floats, without SIMD stuff. */
#define pffft_zreorder_nosimd FUNC_ZREORDER
void pffft_zreorder_nosimd(SETUP_STRUCT *setup, const float *in, float *out, pffft_direction_t direction) {
@@ -1664,7 +1677,7 @@ void pffft_transform_internal_nosimd(SETUP_STRUCT *setup, const float *input, fl
int Ncvec = setup->Ncvec;
int nf_odd = (setup->ifac[1] & 1);
- // temporary buffer is allocated on the stack if the scratch pointer is NULL
+ /* temporary buffer is allocated on the stack if the scratch pointer is NULL */
int stack_allocate = (scratch == 0 ? Ncvec*2 : 1);
VLA_ARRAY_ON_STACK(v4sf, scratch_on_stack, stack_allocate);
float *buff[2];
@@ -1672,7 +1685,7 @@ void pffft_transform_internal_nosimd(SETUP_STRUCT *setup, const float *input, fl
if (scratch == 0) scratch = scratch_on_stack;
buff[0] = output; buff[1] = scratch;
- if (setup->transform == PFFFT_COMPLEX) ordered = 0; // it is always ordered.
+ if (setup->transform == PFFFT_COMPLEX) ordered = 0; /* it is always ordered. */
ib = (nf_odd ^ ordered ? 1 : 0);
if (direction == PFFFT_FORWARD) {
@@ -1688,7 +1701,7 @@ void pffft_transform_internal_nosimd(SETUP_STRUCT *setup, const float *input, fl
}
} else {
if (input == buff[ib]) {
- ib = !ib; // may happen when finput == foutput
+ ib = !ib; /* may happen when finput == foutput */
}
if (ordered) {
FUNC_ZREORDER(setup, input, buff[!ib], PFFFT_BACKWARD);
@@ -1704,7 +1717,7 @@ void pffft_transform_internal_nosimd(SETUP_STRUCT *setup, const float *input, fl
}
if (buff[ib] != output) {
int k;
- // extra copy required -- this situation should happens only when finput == foutput
+ /* extra copy required -- this situation should happens only when finput == foutput */
assert(input==output);
for (k=0; k < Ncvec; ++k) {
float a = buff[ib][2*k], b = buff[ib][2*k+1];
@@ -1722,7 +1735,7 @@ void pffft_zconvolve_accumulate_nosimd(SETUP_STRUCT *s, const float *a, const fl
int k; /* was i -- but always used "2*i" - except at for() */
if (s->transform == PFFFT_REAL) {
- // take care of the fftpack ordering
+ /* take care of the fftpack ordering */
ab[0] += a[0]*b[0]*scaling;
ab[NcvecMulTwo-1] += a[NcvecMulTwo-1]*b[NcvecMulTwo-1]*scaling;
++ab; ++a; ++b; NcvecMulTwo -= 2;
@@ -1744,7 +1757,7 @@ void pffft_zconvolve_no_accu_nosimd(SETUP_STRUCT *s, const float *a, const float
int k; /* was i -- but always used "2*i" - except at for() */
if (s->transform == PFFFT_REAL) {
- // take care of the fftpack ordering
+ /* take care of the fftpack ordering */
ab[0] += a[0]*b[0]*scaling;
ab[NcvecMulTwo-1] += a[NcvecMulTwo-1]*b[NcvecMulTwo-1]*scaling;
++ab; ++a; ++b; NcvecMulTwo -= 2;
@@ -1760,7 +1773,7 @@ void pffft_zconvolve_no_accu_nosimd(SETUP_STRUCT *s, const float *a, const float
}
-#endif // defined(PFFFT_SIMD_DISABLE)
+#endif /* defined(PFFFT_SIMD_DISABLE) */
void FUNC_TRANSFORM_UNORDRD(SETUP_STRUCT *setup, const float *input, float *output, float *work, pffft_direction_t direction) {
FUNC_TRANSFORM_INTERNAL(setup, input, output, (v4sf*)work, direction, 0);
diff --git a/test_pffft.c b/test_pffft.c
index 99a4e11..64e0ba7 100644
--- a/test_pffft.c
+++ b/test_pffft.c
@@ -33,6 +33,10 @@
#include <assert.h>
#include <string.h>
+/* define own constants required to turn off g++ extensions .. */
+#ifndef M_PI
+ #define M_PI 3.14159265358979323846 /* pi */
+#endif
/* EXPECTED_DYN_RANGE in dB:
* single precision float has 24 bits mantissa
diff --git a/test_pffft.cpp b/test_pffft.cpp
index f5d085d..a544870 100644
--- a/test_pffft.cpp
+++ b/test_pffft.cpp
@@ -41,6 +41,11 @@
#include <string.h>
#include <time.h>
+/* define own constants required to turn off g++ extensions .. */
+#ifndef M_PI
+ #define M_PI 3.14159265358979323846 /* pi */
+#endif
+
/* maximum allowed phase error in degree */
#define DEG_ERR_LIMIT 1E-4
diff --git a/test_pffft_double.c b/test_pffft_double.c
index 51e95ac..4a702ec 100644
--- a/test_pffft_double.c
+++ b/test_pffft_double.c
@@ -37,6 +37,10 @@ Note: adapted for double precision dynamic range version.
#include <assert.h>
#include <string.h>
+/* define own constants required to turn off g++ extensions .. */
+#ifndef M_PI
+ #define M_PI 3.14159265358979323846 /* pi */
+#endif
#define EXPECTED_DYN_RANGE 215.0