diff options
author | hayati ayguen <h_ayguen@web.de> | 2020-01-02 00:06:09 +0100 |
---|---|---|
committer | hayati ayguen <h_ayguen@web.de> | 2020-01-10 00:07:15 +0100 |
commit | b2d2936048dec21aa2ba89d6f6939aadedab3cf6 (patch) | |
tree | 9f94b41ba5e20ca8efb902cc7024d269004c7a8f /test_pffft.c | |
parent | 4807c2b449f7042605bdd22e90c665a2175d39b2 (diff) | |
download | pffft-b2d2936048dec21aa2ba89d6f6939aadedab3cf6.tar.gz |
add PFFASTCONV library and test + bench
* API for fast convolution: pffastconv.h
* move simd macros from pffft.c into pfsimd_macros.h for reusability
Signed-off-by: hayati ayguen <h_ayguen@web.de>
Diffstat (limited to 'test_pffft.c')
-rw-r--r-- | test_pffft.c | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/test_pffft.c b/test_pffft.c new file mode 100644 index 0000000..294a43f --- /dev/null +++ b/test_pffft.c @@ -0,0 +1,246 @@ +/* + Copyright (c) 2013 Julien Pommier. + + Small test & bench for PFFFT, comparing its performance with the scalar FFTPACK, FFTW, and Apple vDSP + + How to build: + + on linux, with fftw3: + gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm + + on macos, without fftw3: + clang -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate + + on macos, with fftw3: + clang -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate + + as alternative: replace clang by gcc. + + on windows, with visual c++: + cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c + + build without SIMD instructions: + gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm + + */ + +#include "pffft.h" + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <assert.h> +#include <string.h> + + +/* EXPECTED_DYN_RANGE in dB: + * single precision float has 24 bits mantissa + * => 24 Bits * 6 dB = 144 dB + * allow a few dB tolerance (even 144 dB looks good on my PC) + */ +#define EXPECTED_DYN_RANGE 140.0 + +/* maximum allowed phase error in degree */ +#define DEG_ERR_LIMIT 1E-4 + +/* maximum allowed magnitude error in amplitude (of 1.0 or 1.1) */ +#define MAG_ERR_LIMIT 1E-6 + + +#define PRINT_SPEC 0 + +#define PWR2LOG(PWR) ( (PWR) < 1E-30 ? 10.0*log10(1E-30) : 10.0*log10(PWR) ) + + +int isPowerOfTwo(unsigned v) { + /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */ + int f = v && !(v & (v - 1)); + return f; +} + + +int test(int N, int cplx, int useOrdered) { + int Nfloat = (cplx ? N*2 : N); + float *X = pffft_aligned_malloc((unsigned)Nfloat * sizeof(float)); + float *Y = pffft_aligned_malloc((unsigned)Nfloat * sizeof(float)); + float *Z = pffft_aligned_malloc((unsigned)Nfloat * sizeof(float)); + float *W = pffft_aligned_malloc((unsigned)Nfloat * sizeof(float)); + int k, j, m, iter, kmaxOther, retError = 0; + double freq, dPhi, phi, phi0; + double pwr, pwrCar, pwrOther, err, errSum, mag, expextedMag; + float amp = 1.0F; + + assert( isPowerOfTwo(N) ); + + PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); + assert(s); + if (!s) { + printf("Error setting up PFFFT!\n"); + return 1; + } + + for ( k = m = 0; k < (cplx? N : (1 + N/2) ); k += N/16, ++m ) + { + amp = ( (m % 3) == 0 ) ? 1.0F : 1.1F; + freq = (k < N/2) ? ((double)k / N) : ((double)(k-N) / N); + dPhi = 2.0 * M_PI * freq; + if ( dPhi < 0.0 ) + dPhi += 2.0 * M_PI; + + iter = -1; + while (1) + { + ++iter; + + if (iter) + printf("bin %d: dphi = %f for freq %f\n", k, dPhi, freq); + + /* generate cosine carrier as time signal - start at defined phase phi0 */ + phi = phi0 = (m % 4) * 0.125 * M_PI; /* have phi0 < 90 deg to be normalized */ + for ( j = 0; j < N; ++j ) + { + if (cplx) { + X[2*j] = amp * (float)cos(phi); /* real part */ + X[2*j+1] = amp * (float)sin(phi); /* imag part */ + } + else + X[j] = amp * (float)cos(phi); /* only real part */ + + /* phase increment .. stay normalized - cos()/sin() might degrade! */ + phi += dPhi; + if ( phi >= M_PI ) + phi -= 2.0 * M_PI; + } + + /* forward transform from X --> Y .. using work buffer W */ + if ( useOrdered ) + pffft_transform_ordered(s, X, Y, W, PFFFT_FORWARD ); + else + { + pffft_transform(s, X, Z, W, PFFFT_FORWARD ); /* temporarily use Z for reordering */ + pffft_zreorder(s, Z, Y, PFFFT_FORWARD ); + } + + pwrOther = -1.0; + pwrCar = 0; + + + /* for positive frequencies: 0 to 0.5 * samplerate */ + /* and also for negative frequencies: -0.5 * samplerate to 0 */ + for ( j = 0; j < ( cplx ? N : (1 + N/2) ); ++j ) + { + if (!cplx && !j) /* special treatment for DC for real input */ + pwr = Y[j]*Y[j]; + else if (!cplx && j == N/2) /* treat 0.5 * samplerate */ + pwr = Y[1] * Y[1]; /* despite j (for freq calculation) we have index 1 */ + else + pwr = Y[2*j] * Y[2*j] + Y[2*j+1] * Y[2*j+1]; + if (iter || PRINT_SPEC) + printf("%s fft %d: pwr[j = %d] = %g == %f dB\n", (cplx ? "cplx":"real"), N, j, pwr, PWR2LOG(pwr) ); + if (k == j) + pwrCar = pwr; + else if ( pwr > pwrOther ) { + pwrOther = pwr; + kmaxOther = j; + } + } + + if ( PWR2LOG(pwrCar) - PWR2LOG(pwrOther) < EXPECTED_DYN_RANGE ) { + printf("%s fft %d amp %f iter %d:\n", (cplx ? "cplx":"real"), N, amp, iter); + printf(" carrier power at bin %d: %g == %f dB\n", k, pwrCar, PWR2LOG(pwrCar) ); + printf(" carrier mag || at bin %d: %g\n", k, sqrt(pwrCar) ); + printf(" max other pwr at bin %d: %g == %f dB\n", kmaxOther, pwrOther, PWR2LOG(pwrOther) ); + printf(" dynamic range: %f dB\n\n", PWR2LOG(pwrCar) - PWR2LOG(pwrOther) ); + retError = 1; + if ( iter == 0 ) + continue; + } + + if ( k > 0 && k != N/2 ) + { + phi = atan2( Y[2*k+1], Y[2*k] ); + if ( fabs( phi - phi0) > DEG_ERR_LIMIT * M_PI / 180.0 ) + { + retError = 1; + printf("%s fft %d bin %d amp %f : phase mismatch! phase = %f deg expected = %f deg\n", + (cplx ? "cplx":"real"), N, k, amp, phi * 180.0 / M_PI, phi0 * 180.0 / M_PI ); + } + } + + expextedMag = cplx ? amp : ( (k == 0 || k == N/2) ? amp : (amp/2) ); + mag = sqrt(pwrCar) / N; + if ( fabs(mag - expextedMag) > MAG_ERR_LIMIT ) + { + retError = 1; + printf("%s fft %d bin %d amp %f : mag = %g expected = %g\n", (cplx ? "cplx":"real"), N, k, amp, mag, expextedMag ); + } + + + /* now convert spectrum back */ + pffft_transform_ordered(s, Y, Z, W, PFFFT_BACKWARD); + + errSum = 0.0; + for ( j = 0; j < (cplx ? (2*N) : N); ++j ) + { + /* scale back */ + Z[j] /= N; + /* square sum errors over real (and imag parts) */ + err = (X[j]-Z[j]) * (X[j]-Z[j]); + errSum += err; + } + + if ( errSum > N * 1E-7 ) + { + retError = 1; + printf("%s fft %d bin %d : inverse FFT doesn't match original signal! errSum = %g ; mean err = %g\n", (cplx ? "cplx":"real"), N, k, errSum, errSum / N); + } + + break; + } + + } + pffft_destroy_setup(s); + pffft_aligned_free(X); + pffft_aligned_free(Y); + pffft_aligned_free(Z); + pffft_aligned_free(W); + + return retError; +} + + + +int main(int argc, char **argv) +{ + int N, result, resN, resAll; + + resAll = 0; + for ( N = 32; N <= 65536; N *= 2 ) + { + result = test(N, 1 /* cplx fft */, 1 /* useOrdered */); + resN = result; + resAll |= result; + + result = test(N, 0 /* cplx fft */, 1 /* useOrdered */); + resN |= result; + resAll |= result; + + result = test(N, 1 /* cplx fft */, 0 /* useOrdered */); + resN |= result; + resAll |= result; + + result = test(N, 0 /* cplx fft */, 0 /* useOrdered */); + resN |= result; + resAll |= result; + + if (!resN) + printf("tests for size %d succeeded successfully.\n", N); + } + + if (!resAll) + printf("all tests succeeded successfully.\n"); + + return resAll; +} + |