diff options
author | hayati ayguen <h_ayguen@web.de> | 2020-08-26 15:50:33 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-26 15:50:33 +0200 |
commit | 742aa962f5d7ee4ebac8fef3bf1f4cbcccbe3655 (patch) | |
tree | bb5808ffe679df44d24945eef25b4dc8af981cb6 | |
parent | 7ed5e2a45040ae91c0b2fade901a503c77ccf381 (diff) | |
parent | 9b888db372546ed2a730e459551cb4e01118ee35 (diff) | |
download | pffft-742aa962f5d7ee4ebac8fef3bf1f4cbcccbe3655.tar.gz |
Merge pull request #17 from hayguen/master
added pocket fft ..
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | CMakeLists.txt | 29 | ||||
-rw-r--r-- | README.md | 10 | ||||
-rwxr-xr-x | bench_all.sh | 4 | ||||
-rw-r--r-- | bench_pffft.c | 100 | ||||
m--------- | pocketfft | 0 |
6 files changed, 135 insertions, 11 deletions
diff --git a/.gitmodules b/.gitmodules index 701a96c..9ef3633 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "kissfft"] path = kissfft url = https://github.com/hayguen/kissfft.git +[submodule "pocketfft"] + path = pocketfft + url = https://github.com/hayguen/pocketfft.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b6bc10..160b3c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,9 +11,10 @@ option(USE_SIMD_NEON "force using NEON on ARM? (requires USE_SIMD)" OFF) option(USE_SCALAR_VECT "use 4-element vector scalar operations (if no other SIMD)" ON) # test options -option(USE_BENCH_FFTW "use (system-installed) FFTW3 in fft benchmark?" OFF) -option(USE_BENCH_GREEN "use Green FFT in fft benchmark? - if exists in subdir" ON) -option(USE_BENCH_KISS "use KissFFT in fft benchmark? - if exists in subdir" ON) +option(USE_BENCH_FFTW "use (system-installed) FFTW3 in fft benchmark?" OFF) +option(USE_BENCH_GREEN "use Green FFT in fft benchmark? - if exists in subdir" ON) +option(USE_BENCH_KISS "use KissFFT in fft benchmark? - if exists in subdir" ON) +option(USE_BENCH_POCKET "use PocketFFT in fft benchmark? - if exists in subdir" ON) option(USE_DEBUG_ASAN "use GCC's address sanitizer?" OFF) @@ -61,6 +62,17 @@ if (USE_BENCH_KISS) endif() endif() +if (USE_BENCH_POCKET) + # git submodule add https://github.com/hayguen/pocketfft.git + if (EXISTS "${CMAKE_CURRENT_LIST_DIR}/pocketfft/pocketfft_double.c") + message(STATUS "found subdir pocketfft") + set(PATH_POCKET "${CMAKE_CURRENT_LIST_DIR}/pocketfft") + add_subdirectory( "${PATH_POCKET}" ) + else() + message(WARNING "PocketFFT not found in subdir pocketfft") + endif() +endif() + ######################################################################## # select the release build type by default to get optimization flags @@ -236,6 +248,12 @@ if (USE_TYPE_FLOAT) target_compile_definitions(bench_pffft_float PRIVATE HAVE_KISS_FFT=1) target_link_libraries(bench_pffft_float KissFFT) endif() + + if (PATH_POCKET AND USE_BENCH_POCKET) + target_compile_definitions(bench_pffft_float PRIVATE HAVE_POCKET_FFT=1) + target_link_libraries(bench_pffft_float PocketFFT) + endif() + endif() if (USE_TYPE_DOUBLE) @@ -248,6 +266,11 @@ if (USE_TYPE_DOUBLE) target_compile_definitions(bench_pffft_double PRIVATE HAVE_FFTW=1) target_link_libraries(bench_pffft_double fftw3) endif() + + if (PATH_POCKET AND USE_BENCH_POCKET) + target_compile_definitions(bench_pffft_double PRIVATE HAVE_POCKET_FFT=1) + target_link_libraries(bench_pffft_double PocketFFT) + endif() endif() ###################################################### @@ -98,6 +98,11 @@ and `libPFFASTCONV.a` from the source files, plus the additional `libFFTPACK.a` library. Later one's sources are there anyway for the benchmark. +## Origin: +Origin for this code is Julien Pommier's pffft on bitbucket: +[https://bitbucket.org/jpommier/pffft/](https://bitbucket.org/jpommier/pffft/) + + ## Comparison with other FFTs: The idea was not to break speed records, but to get a decently fast @@ -114,6 +119,11 @@ It is also a bit focused on performing 1D convolutions, that is why it provides "unordered" FFTs , and a fourier domain convolution operation. +Very interesting is [https://www.nayuki.io/page/free-small-fft-in-multiple-languages](https://www.nayuki.io/page/free-small-fft-in-multiple-languages). +It shows how small an FFT can be - including the Bluestein algorithm, but it's everything else than fast. +The whole C++ implementation file is 161 lines, including the Copyright header, see +[https://github.com/nayuki/Nayuki-web-published-code/blob/master/free-small-fft-in-multiple-languages/FftComplex.cpp](https://github.com/nayuki/Nayuki-web-published-code/blob/master/free-small-fft-in-multiple-languages/FftComplex.cpp) + ## Benchmark results diff --git a/bench_all.sh b/bench_all.sh index 37be94c..b4c6b23 100755 --- a/bench_all.sh +++ b/bench_all.sh @@ -32,13 +32,13 @@ else fi -#cmake -DCMAKE_TOOLCHAIN_FILE=ToolChain.cmake -DUSE_FFTW=${FFTW} -DUSE_SIMD=OFF ${CMAKEOPT} ../ +#cmake -DCMAKE_TOOLCHAIN_FILE=ToolChain.cmake -DUSE_BENCH_FFTW=${FFTW} -DUSE_SIMD=OFF ${CMAKEOPT} ../ #make clean #make #echo -e "\n\nrunning without simd (==scalar) .." #time ctest -V -cmake -DCMAKE_TOOLCHAIN_FILE=ToolChain.cmake -DUSE_FFTW=${FFTW} -DUSE_SIMD=ON ${CMAKEOPT} ../ +cmake -DCMAKE_TOOLCHAIN_FILE=ToolChain.cmake -DUSE_BENCH_FFTW=${FFTW} -DUSE_SIMD=ON ${CMAKEOPT} ../ #make clean make echo -e "\n\nrunning with simd .." diff --git a/bench_pffft.c b/bench_pffft.c index 49d4faa..83b6d0f 100644 --- a/bench_pffft.c +++ b/bench_pffft.c @@ -26,6 +26,7 @@ */ #define CONCAT_TOKENS(A, B) A ## B +#define CONCAT_THREE_TOKENS(A, B, C) A ## B ## C #ifdef PFFFT_ENABLE_FLOAT #include "pffft.h" @@ -45,7 +46,6 @@ typedef PFFFTD_Setup PFFFT_SETUP; #define PFFFT_FUNC(F) CONCAT_TOKENS(pffftd_, F) #endif - #ifdef PFFFT_ENABLE_FLOAT #include "fftpack.h" @@ -61,6 +61,25 @@ typedef PFFFTD_Setup PFFFT_SETUP; #endif +#ifdef HAVE_POCKET_FFT +#include <pocketfft_double.h> +#include <pocketfft_single.h> +#endif + +#ifdef PFFFT_ENABLE_FLOAT + #define POCKFFTR_PRE(R) CONCAT_TOKENS(rffts, R) + #define POCKFFTC_PRE(R) CONCAT_TOKENS(cffts, R) + #define POCKFFTR_MID(L,R) CONCAT_THREE_TOKENS(L, rffts, R) + #define POCKFFTC_MID(L,R) CONCAT_THREE_TOKENS(L, cffts, R) +#else + #define POCKFFTR_PRE(R) CONCAT_TOKENS(rfft, R) + #define POCKFFTC_PRE(R) CONCAT_TOKENS(cfft, R) + #define POCKFFTR_MID(L,R) CONCAT_THREE_TOKENS(L, rfft, R) + #define POCKFFTC_MID(L,R) CONCAT_THREE_TOKENS(L, cfft, R) +#endif + + + #include <math.h> #include <stdio.h> #include <stdlib.h> @@ -97,7 +116,7 @@ typedef fftw_complex FFTW_COMPLEX; #endif -#define NUM_FFT_ALGOS 8 +#define NUM_FFT_ALGOS 9 enum { ALGO_FFTPACK = 0, ALGO_VECLIB, @@ -105,8 +124,9 @@ enum { ALGO_FFTW_AUTO, ALGO_GREEN, ALGO_KISS, - ALGO_PFFFT_U, /* = 6 */ - ALGO_PFFFT_O /* = 7 */ + ALGO_POCKET, + ALGO_PFFFT_U, /* = 7 */ + ALGO_PFFFT_O /* = 8 */ }; #define NUM_TYPES 7 @@ -128,6 +148,7 @@ const char * algoName[NUM_FFT_ALGOS] = { "FFTW F(auto) ", "Green ", "Kiss ", + "Pocket ", "PFFFT-U(simd)", /* unordered */ "PFFFT (simd) " /* ordered */ }; @@ -160,6 +181,11 @@ int compiledInAlgo[NUM_FFT_ALGOS] = { #else 0, #endif +#if defined(HAVE_POCKET_FFT) + 1, /* "Pocket " */ +#else + 0, +#endif 1, /* "PFFFT_U " */ 1 /* "PFFFT_O " */ }; @@ -171,6 +197,7 @@ const char * algoTableHeader[NUM_FFT_ALGOS][2] = { { "|real FFTWauto ", "|cplx FFTWauto " }, { "| real Green ", "| cplx Green " }, { "| real Kiss ", "| cplx Kiss " }, +{ "| real Pocket ", "| cplx Pocket " }, { "| real PFFFT-U ", "| cplx PFFFT-U " }, { "| real PFFFT ", "| cplx PFFFT " } }; @@ -820,6 +847,67 @@ void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, doubl } #endif +#if defined(HAVE_POCKET_FFT) + + Nmax = (cplx ? nextPow2N*2 : nextPow2N); + X[Nmax] = checkVal; + if ( 1 || PFFFT_FUNC(is_power_of_two)(N) ) + { + POCKFFTR_PRE(_plan) planr; + POCKFFTC_PRE(_plan) planc; + + te = uclock_sec(); + if (cplx) { + planc = POCKFFTC_MID(make_,_plan)(nextPow2N); + } else { + planr = POCKFFTR_MID(make_,_plan)(nextPow2N); + } + + t0 = uclock_sec(); + tstop = t0 + max_test_duration; + max_iter = 0; + do { + for ( k = 0; k < step_iter; ++k ) { + if (cplx) { + assert( X[Nmax] == checkVal ); + memcpy(Y, X, 2*nextPow2N * sizeof(pffft_scalar) ); + POCKFFTC_PRE(_forward)(planc, Y, 1.); + assert( X[Nmax] == checkVal ); + memcpy(X, Y, 2*nextPow2N * sizeof(pffft_scalar) ); + POCKFFTC_PRE(_backward)(planc, X, 1./nextPow2N); + assert( X[Nmax] == checkVal ); + } else { + assert( X[Nmax] == checkVal ); + memcpy(Y, X, nextPow2N * sizeof(pffft_scalar) ); + POCKFFTR_PRE(_forward)(planr, Y, 1.); + assert( X[Nmax] == checkVal ); + memcpy(X, Y, nextPow2N * sizeof(pffft_scalar) ); + POCKFFTR_PRE(_backward)(planr, X, 1./nextPow2N); + assert( X[Nmax] == checkVal ); + } + ++max_iter; + } + t1 = uclock_sec(); + } while ( t1 < tstop ); + + if (cplx) { + POCKFFTC_MID(destroy_,_plan)(planc); + } else { + POCKFFTR_MID(destroy_,_plan)(planr); + } + + flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ + tmeas[TYPE_ITER][ALGO_POCKET] = max_iter; + tmeas[TYPE_MFLOPS][ALGO_POCKET] = flops/1e6/(t1 - t0 + 1e-16); + tmeas[TYPE_DUR_TOT][ALGO_POCKET] = t1 - t0; + tmeas[TYPE_DUR_NS][ALGO_POCKET] = show_output("Pocket", N, cplx, flops, t0, t1, max_iter, tableFile); + tmeas[TYPE_PREP][ALGO_POCKET] = (t0 - te) * 1e3; + haveAlgo[ALGO_POCKET] = 1; + } else { + show_output("Pocket", N, cplx, -1, -1, -1, -1, tableFile); + } +#endif + /* PFFFT-U (unordered) benchmark */ Nmax = (cplx ? pffftPow2N*2 : pffftPow2N); @@ -977,9 +1065,9 @@ int main(int argc, char **argv) { -1 }; #endif -#define NUMPOW2FFTLENS 21 +#define NUMPOW2FFTLENS 22 #define MAXNUMFFTLENS MAX( NUMPOW2FFTLENS, NUMNONPOW2LENS ) - int Npow2[NUMPOW2FFTLENS]; /* exp = 1 .. 20, -1 */ + int Npow2[NUMPOW2FFTLENS]; /* exp = 1 .. 21, -1 */ const int *Nvalues = NULL; double tmeas[2][MAXNUMFFTLENS][NUM_TYPES][NUM_FFT_ALGOS]; double iterCalReal, iterCalCplx; diff --git a/pocketfft b/pocketfft new file mode 160000 +Subproject 78a0254546e3cd0e042e0216b71f7e1e79f0309 |