diff options
author | Julien Pommier <pommier@pianoteq.com> | 2011-11-20 10:58:07 +0100 |
---|---|---|
committer | Julien Pommier <pommier@pianoteq.com> | 2011-11-20 10:58:07 +0100 |
commit | 836bc4bdd75a5750f1f3ba1c0e69fca6dbfc58ab (patch) | |
tree | 5e0f6bc2f3763637ac93057e9dbc8b550b3e6eb4 /test_pffft.c | |
parent | 370d2099e358fb7955a08275822570ee278226f8 (diff) | |
download | pffft-836bc4bdd75a5750f1f3ba1c0e69fca6dbfc58ab.tar.gz |
benchmarks added
Diffstat (limited to 'test_pffft.c')
-rw-r--r-- | test_pffft.c | 137 |
1 files changed, 94 insertions, 43 deletions
diff --git a/test_pffft.c b/test_pffft.c index e295007..408dc8e 100644 --- a/test_pffft.c +++ b/test_pffft.c @@ -5,20 +5,19 @@ How to build: - on linux, with fftw3 - - gcc-4.2 -o test_pffft -DHAVE_FFTW -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f - - on macos, without fftw3 + on linux, with fftw3: + gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + on macos, without fftw3: gcc-4.2 -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework veclib - on macos, with fftw3 - + on macos, with fftw3: gcc-4.2 -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework veclib + + on windows, with visual c++: + cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c build without SIMD instructions: - gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm */ @@ -59,7 +58,7 @@ double frand() { struct tms t; return ((double)times(&t)) / ttclk; } # else - inline double uclock_sec(void) + double uclock_sec(void) { return (double)clock()/(double)CLOCKS_PER_SEC; } #endif @@ -75,6 +74,7 @@ void pffft_validate_N(int N, int cplx) { int pass; for (pass=0; pass < 2; ++pass) { + float ref_max = 0; int k; //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); // compute reference solution with FFTPACK @@ -100,7 +100,6 @@ void pffft_validate_N(int N, int cplx) { free(wrk); } - float ref_max = 0; for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k])); @@ -178,12 +177,12 @@ void pffft_validate_N(int N, int cplx) { } for (k=0; k < Nfloat; ++k) { - assert(fabs(tmp[k] - tmp2[k]) < 1e-3*ref_max); + assert(fabs(tmp[k] - tmp2[k]) < 1e-2*ref_max); } } - printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); + printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout); pffft_destroy_setup(s); pffft_aligned_free(ref); @@ -204,6 +203,21 @@ void pffft_validate(int cplx) { } } +int array_output_format = 0; + +void show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter) { + float mflops = flops/1e6/(t1 - t0 + 1e-16); + if (array_output_format) { + if (flops != -1) { + printf("|%10.0f ", mflops); + } else printf("| n/a "); + } else { + if (flops != -1) { + printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter); + } + } + fflush(stdout); +} void benchmark_ffts(int N, int cplx) { int Nfloat = (cplx ? N*2 : N); @@ -213,32 +227,16 @@ void benchmark_ffts(int N, int cplx) { double t0, t1, flops; int k; - int max_iter = 5120000/N*16; + int max_iter = 5120000/N*4; #ifdef __arm__ - max_iter /= 8; + max_iter /= 4; #endif int iter; - for (k = 0; k < Nfloat; ++k) { X[k] = 0; //sqrtf(k+1); } - // PFFFT benchmark - { - PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); - t0 = uclock_sec(); - for (iter = 0; iter < max_iter; ++iter) { - pffft_transform(s, X, Z, Y, PFFFT_FORWARD); - pffft_transform(s, X, Z, Y, PFFFT_BACKWARD); - } - t1 = uclock_sec(); - pffft_destroy_setup(s); - - flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html - printf("N=%5d, %s PFFFT : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), flops/1e6/(t1 - t0 + 1e-16), (t1-t0)/2/max_iter * 1e9, max_iter); - } - // FFTPack benchmark { float *wrk = malloc(2*Nbytes + 15*sizeof(float)); @@ -260,7 +258,7 @@ void benchmark_ffts(int N, int cplx) { free(wrk); flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html - printf("N=%5d, %s FFTPACK : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), flops/1e6/(t1 - t0 + 1e-16), (t1-t0)/2/max_iter_ * 1e9, max_iter_); + show_output("FFTPack", N, cplx, flops, t0, t1, max_iter); } #ifdef HAVE_VECLIB @@ -286,7 +284,9 @@ void benchmark_ffts(int N, int cplx) { vDSP_destroy_fftsetup(setup); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html - printf("N=%5d, %s vDSP : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), flops/1e6/(t1 - t0 + 1e-16), (t1-t0)/2/max_iter * 1e9, max_iter); + show_output("vDSP", N, cplx, flops, t0, t1, max_iter); + } else { + show_output("vDSP", N, cplx, -1, -1, -1, -1); } #endif @@ -318,12 +318,28 @@ void benchmark_ffts(int N, int cplx) { fftwf_free(in); fftwf_free(out); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html - printf("N=%5d, %s FFTW (%s) : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), - (flags == FFTW_MEASURE ? "meas." : "estim"), flops/1e6/(t1 - t0 + 1e-16), (t1-t0)/2/max_iter * 1e9, max_iter); + show_output((flags == FFTW_MEASURE ? "FFTW (meas.)" : " FFTW (estim)"), N, cplx, flops, t0, t1, max_iter); } #endif - printf("--\n"); + // PFFFT benchmark + { + PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + t0 = uclock_sec(); + for (iter = 0; iter < max_iter; ++iter) { + pffft_transform(s, X, Z, Y, PFFFT_FORWARD); + pffft_transform(s, X, Z, Y, PFFFT_BACKWARD); + } + t1 = uclock_sec(); + pffft_destroy_setup(s); + + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html + show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); + } + + if (!array_output_format) { + printf("--\n"); + } pffft_aligned_free(X); pffft_aligned_free(Y); @@ -335,19 +351,54 @@ void validate_pffft_simd(); // a small function inside pffft.c that will detect #endif int main(int argc, char **argv) { + int Nvalues[] = { 64, 96, 128, 192, 256, 384, 512, 3*256, 1024, 2048, 4096, 8192, 9*1024, 16384, 32768, 256*1024, 1024*1024, -1 }; + int i; + + if (argc > 1 && strcmp(argv[1], "--array-format") == 0) { + array_output_format = 1; + } + #ifndef PFFFT_SIMD_DISABLE validate_pffft_simd(); #endif pffft_validate(1); pffft_validate(0); - int N; - for (N = 64; N < 8192*256; N *= 2) { - if (N >= 16384) N*=4; - benchmark_ffts(N, 0); - } - for (N = 64; N < 8192*256; N *= 2) { - if (N >= 16384) N*=4; - benchmark_ffts(N, 1); + if (!array_output_format) { + for (i=0; Nvalues[i] > 0; ++i) { + benchmark_ffts(Nvalues[i], 0); + } + for (i=0; Nvalues[i] > 0; ++i) { + benchmark_ffts(Nvalues[i], 1); + } + } else { + printf("| N (input length) "); + printf("| real FFTPack "); +#ifdef HAVE_VECLIB + printf("| real vDSP "); +#endif +#ifdef HAVE_FFTW + printf("| real FFTW "); +#endif + printf("| real PFFFT | "); + + printf("| cplx FFTPack "); +#ifdef HAVE_VECLIB + printf("| cplx vDSP "); +#endif +#ifdef HAVE_FFTW + printf("| cplx FFTW "); +#endif + printf("| cplx PFFFT |\n"); + for (i=0; Nvalues[i] > 0; ++i) { + printf("| %12d ", Nvalues[i]); + benchmark_ffts(Nvalues[i], 0); + printf("| "); + benchmark_ffts(Nvalues[i], 1); + printf("|\n"); + } + printf(" (numbers are given in MFlops)\n"); } + + return 0; } |