benchmarks added

author: Julien Pommier <pommier@pianoteq.com> 2011-11-20 10:58:07 +0100
committer: Julien Pommier <pommier@pianoteq.com> 2011-11-20 10:58:07 +0100
commit: 836bc4bdd75a5750f1f3ba1c0e69fca6dbfc58ab (patch)
tree: 5e0f6bc2f3763637ac93057e9dbc8b550b3e6eb4 /test_pffft.c
parent: 370d2099e358fb7955a08275822570ee278226f8 (diff)
download: pffft-836bc4bdd75a5750f1f3ba1c0e69fca6dbfc58ab.tar.gz
1 files changed, 94 insertions, 43 deletions
diff --git a/test_pffft.c b/test_pffft.c
index e295007..408dc8e 100644
--- a/test_pffft.c
+++ b/test_pffft.c
@@ -5,20 +5,19 @@
 
   How to build: 
 
-  on linux, with fftw3
-
-  gcc-4.2 -o test_pffft -DHAVE_FFTW -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f
-
-  on macos, without fftw3
+  on linux, with fftw3:
+  gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm
 
+  on macos, without fftw3:
   gcc-4.2 -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework veclib
 
-  on macos, with fftw3
-
+  on macos, with fftw3:
   gcc-4.2 -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework veclib
+
+  on windows, with visual c++:
+  cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c
   
   build without SIMD instructions:
-
   gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm
 
  */
@@ -59,7 +58,7 @@ double frand() {
     struct tms t; return ((double)times(&t)) / ttclk;
   }
 # else
-  inline double uclock_sec(void)
+  double uclock_sec(void)
 { return (double)clock()/(double)CLOCKS_PER_SEC; }
 #endif
 
@@ -75,6 +74,7 @@ void pffft_validate_N(int N, int cplx) {
   
   int pass;
   for (pass=0; pass < 2; ++pass) {
+    float ref_max = 0;
     int k;
     //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx);
     // compute reference solution with FFTPACK
@@ -100,7 +100,6 @@ void pffft_validate_N(int N, int cplx) {
       free(wrk);
     }
 
-    float ref_max = 0;
     for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k]));
 
       
@@ -178,12 +177,12 @@ void pffft_validate_N(int N, int cplx) {
     }
 
     for (k=0; k < Nfloat; ++k) {
-      assert(fabs(tmp[k] - tmp2[k]) < 1e-3*ref_max);
+      assert(fabs(tmp[k] - tmp2[k]) < 1e-2*ref_max);
     }
 
   }
 
-  printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N);
+  printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout);
   
   pffft_destroy_setup(s);
   pffft_aligned_free(ref);
@@ -204,6 +203,21 @@ void pffft_validate(int cplx) {
   }
 }
 
+int array_output_format = 0;
+
+void show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter) {
+  float mflops = flops/1e6/(t1 - t0 + 1e-16);
+  if (array_output_format) {
+    if (flops != -1) {
+      printf("|%10.0f    ", mflops);
+    } else printf("|       n/a    ");
+  } else {
+    if (flops != -1) {
+      printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter);
+    }
+  }
+  fflush(stdout);
+}
 
 void benchmark_ffts(int N, int cplx) {
   int Nfloat = (cplx ? N*2 : N);
@@ -213,32 +227,16 @@ void benchmark_ffts(int N, int cplx) {
   double t0, t1, flops;
 
   int k;
-  int max_iter = 5120000/N*16;
+  int max_iter = 5120000/N*4;
 #ifdef __arm__
-  max_iter /= 8;
+  max_iter /= 4;
 #endif
   int iter;
 
-
   for (k = 0; k < Nfloat; ++k) {
     X[k] = 0; //sqrtf(k+1);
   }
 
-  // PFFFT benchmark
-  {
-    PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
-    t0 = uclock_sec();  
-    for (iter = 0; iter < max_iter; ++iter) {
-      pffft_transform(s, X, Z, Y, PFFFT_FORWARD);
-      pffft_transform(s, X, Z, Y, PFFFT_BACKWARD);
-    }
-    t1 = uclock_sec();
-    pffft_destroy_setup(s);
-    
-    flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
-    printf("N=%5d, %s PFFFT        : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), flops/1e6/(t1 - t0 + 1e-16), (t1-t0)/2/max_iter * 1e9, max_iter);
-  }
-
   // FFTPack benchmark
   {
     float *wrk = malloc(2*Nbytes + 15*sizeof(float));
@@ -260,7 +258,7 @@ void benchmark_ffts(int N, int cplx) {
     free(wrk);
     
     flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
-    printf("N=%5d, %s FFTPACK      : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), flops/1e6/(t1 - t0 + 1e-16), (t1-t0)/2/max_iter_ * 1e9, max_iter_);
+    show_output("FFTPack", N, cplx, flops, t0, t1, max_iter);
   }
 
 #ifdef HAVE_VECLIB
@@ -286,7 +284,9 @@ void benchmark_ffts(int N, int cplx) {
     vDSP_destroy_fftsetup(setup);
 
     flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
-    printf("N=%5d, %s vDSP         : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), flops/1e6/(t1 - t0 + 1e-16), (t1-t0)/2/max_iter * 1e9, max_iter);
+    show_output("vDSP", N, cplx, flops, t0, t1, max_iter);
+  } else {
+    show_output("vDSP", N, cplx, -1, -1, -1, -1);
   }
 #endif
   
@@ -318,12 +318,28 @@ void benchmark_ffts(int N, int cplx) {
     fftwf_free(in); fftwf_free(out);
 
     flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
-    printf("N=%5d, %s FFTW (%s) : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), 
-           (flags == FFTW_MEASURE ? "meas." : "estim"), flops/1e6/(t1 - t0 + 1e-16), (t1-t0)/2/max_iter * 1e9, max_iter);
+    show_output((flags == FFTW_MEASURE ? "FFTW (meas.)" : " FFTW (estim)"), N, cplx, flops, t0, t1, max_iter);
   }
 #endif  
 
-  printf("--\n");
+  // PFFFT benchmark
+  {
+    PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
+    t0 = uclock_sec();  
+    for (iter = 0; iter < max_iter; ++iter) {
+      pffft_transform(s, X, Z, Y, PFFFT_FORWARD);
+      pffft_transform(s, X, Z, Y, PFFFT_BACKWARD);
+    }
+    t1 = uclock_sec();
+    pffft_destroy_setup(s);
+    
+    flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
+    show_output("PFFFT", N, cplx, flops, t0, t1, max_iter);
+  }
+
+  if (!array_output_format) {
+    printf("--\n");
+  }
 
   pffft_aligned_free(X);
   pffft_aligned_free(Y);
@@ -335,19 +351,54 @@ void validate_pffft_simd(); // a small function inside pffft.c that will detect
 #endif
 
 int main(int argc, char **argv) {
+  int Nvalues[] = { 64, 96, 128, 192, 256, 384, 512, 3*256, 1024, 2048, 4096, 8192, 9*1024, 16384, 32768, 256*1024, 1024*1024, -1 };
+  int i;
+
+  if (argc > 1 && strcmp(argv[1], "--array-format") == 0) {
+    array_output_format = 1;
+  }
+
 #ifndef PFFFT_SIMD_DISABLE
   validate_pffft_simd();
 #endif
   pffft_validate(1);
   pffft_validate(0);
-  int N;
-  for (N = 64; N < 8192*256; N *= 2) {
-    if (N >= 16384) N*=4;
-    benchmark_ffts(N, 0);
-  }
-  for (N = 64; N < 8192*256; N *= 2) {
-    if (N >= 16384) N*=4;
-    benchmark_ffts(N, 1);
+  if (!array_output_format) {
+    for (i=0; Nvalues[i] > 0; ++i) {
+      benchmark_ffts(Nvalues[i], 0);
+    }
+    for (i=0; Nvalues[i] > 0; ++i) {
+      benchmark_ffts(Nvalues[i], 1);
+    }
+  } else {
+    printf("| N (input length) ");
+    printf("| real FFTPack ");
+#ifdef HAVE_VECLIB
+    printf("|   real vDSP  ");
+#endif
+#ifdef HAVE_FFTW
+    printf("|   real FFTW  ");
+#endif
+    printf("|  real PFFFT  | ");
+
+    printf("| cplx FFTPack ");
+#ifdef HAVE_VECLIB
+    printf("|   cplx vDSP  ");
+#endif
+#ifdef HAVE_FFTW
+    printf("|   cplx FFTW  ");
+#endif
+    printf("|  cplx PFFFT  |\n");
+    for (i=0; Nvalues[i] > 0; ++i) {
+      printf("| %12d     ", Nvalues[i]);
+      benchmark_ffts(Nvalues[i], 0); 
+      printf("| ");
+      benchmark_ffts(Nvalues[i], 1);
+      printf("|\n");
+    }
+    printf(" (numbers are given in MFlops)\n");
   }
+
+
   return 0;
 }
author	Julien Pommier <pommier@pianoteq.com>	2011-11-20 10:58:07 +0100
committer	Julien Pommier <pommier@pianoteq.com>	2011-11-20 10:58:07 +0100
commit	836bc4bdd75a5750f1f3ba1c0e69fca6dbfc58ab (patch)
tree	5e0f6bc2f3763637ac93057e9dbc8b550b3e6eb4 /test_pffft.c
parent	370d2099e358fb7955a08275822570ee278226f8 (diff)
download	pffft-836bc4bdd75a5750f1f3ba1c0e69fca6dbfc58ab.tar.gz