aboutsummaryrefslogtreecommitdiff
path: root/test_pffft.c
diff options
context:
space:
mode:
authorhayati ayguen <h_ayguen@web.de>2020-01-02 00:06:09 +0100
committerhayati ayguen <h_ayguen@web.de>2020-01-10 00:07:15 +0100
commitb2d2936048dec21aa2ba89d6f6939aadedab3cf6 (patch)
tree9f94b41ba5e20ca8efb902cc7024d269004c7a8f /test_pffft.c
parent4807c2b449f7042605bdd22e90c665a2175d39b2 (diff)
downloadpffft-b2d2936048dec21aa2ba89d6f6939aadedab3cf6.tar.gz
add PFFASTCONV library and test + bench
* API for fast convolution: pffastconv.h * move simd macros from pffft.c into pfsimd_macros.h for reusability Signed-off-by: hayati ayguen <h_ayguen@web.de>
Diffstat (limited to 'test_pffft.c')
-rw-r--r--test_pffft.c246
1 files changed, 246 insertions, 0 deletions
diff --git a/test_pffft.c b/test_pffft.c
new file mode 100644
index 0000000..294a43f
--- /dev/null
+++ b/test_pffft.c
@@ -0,0 +1,246 @@
+/*
+ Copyright (c) 2013 Julien Pommier.
+
+ Small test & bench for PFFFT, comparing its performance with the scalar FFTPACK, FFTW, and Apple vDSP
+
+ How to build:
+
+ on linux, with fftw3:
+ gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm
+
+ on macos, without fftw3:
+ clang -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate
+
+ on macos, with fftw3:
+ clang -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate
+
+ as alternative: replace clang by gcc.
+
+ on windows, with visual c++:
+ cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c
+
+ build without SIMD instructions:
+ gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm
+
+ */
+
+#include "pffft.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <assert.h>
+#include <string.h>
+
+
+/* EXPECTED_DYN_RANGE in dB:
+ * single precision float has 24 bits mantissa
+ * => 24 Bits * 6 dB = 144 dB
+ * allow a few dB tolerance (even 144 dB looks good on my PC)
+ */
+#define EXPECTED_DYN_RANGE 140.0
+
+/* maximum allowed phase error in degree */
+#define DEG_ERR_LIMIT 1E-4
+
+/* maximum allowed magnitude error in amplitude (of 1.0 or 1.1) */
+#define MAG_ERR_LIMIT 1E-6
+
+
+#define PRINT_SPEC 0
+
+#define PWR2LOG(PWR) ( (PWR) < 1E-30 ? 10.0*log10(1E-30) : 10.0*log10(PWR) )
+
+
+int isPowerOfTwo(unsigned v) {
+ /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */
+ int f = v && !(v & (v - 1));
+ return f;
+}
+
+
+int test(int N, int cplx, int useOrdered) {
+ int Nfloat = (cplx ? N*2 : N);
+ float *X = pffft_aligned_malloc((unsigned)Nfloat * sizeof(float));
+ float *Y = pffft_aligned_malloc((unsigned)Nfloat * sizeof(float));
+ float *Z = pffft_aligned_malloc((unsigned)Nfloat * sizeof(float));
+ float *W = pffft_aligned_malloc((unsigned)Nfloat * sizeof(float));
+ int k, j, m, iter, kmaxOther, retError = 0;
+ double freq, dPhi, phi, phi0;
+ double pwr, pwrCar, pwrOther, err, errSum, mag, expextedMag;
+ float amp = 1.0F;
+
+ assert( isPowerOfTwo(N) );
+
+ PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
+ assert(s);
+ if (!s) {
+ printf("Error setting up PFFFT!\n");
+ return 1;
+ }
+
+ for ( k = m = 0; k < (cplx? N : (1 + N/2) ); k += N/16, ++m )
+ {
+ amp = ( (m % 3) == 0 ) ? 1.0F : 1.1F;
+ freq = (k < N/2) ? ((double)k / N) : ((double)(k-N) / N);
+ dPhi = 2.0 * M_PI * freq;
+ if ( dPhi < 0.0 )
+ dPhi += 2.0 * M_PI;
+
+ iter = -1;
+ while (1)
+ {
+ ++iter;
+
+ if (iter)
+ printf("bin %d: dphi = %f for freq %f\n", k, dPhi, freq);
+
+ /* generate cosine carrier as time signal - start at defined phase phi0 */
+ phi = phi0 = (m % 4) * 0.125 * M_PI; /* have phi0 < 90 deg to be normalized */
+ for ( j = 0; j < N; ++j )
+ {
+ if (cplx) {
+ X[2*j] = amp * (float)cos(phi); /* real part */
+ X[2*j+1] = amp * (float)sin(phi); /* imag part */
+ }
+ else
+ X[j] = amp * (float)cos(phi); /* only real part */
+
+ /* phase increment .. stay normalized - cos()/sin() might degrade! */
+ phi += dPhi;
+ if ( phi >= M_PI )
+ phi -= 2.0 * M_PI;
+ }
+
+ /* forward transform from X --> Y .. using work buffer W */
+ if ( useOrdered )
+ pffft_transform_ordered(s, X, Y, W, PFFFT_FORWARD );
+ else
+ {
+ pffft_transform(s, X, Z, W, PFFFT_FORWARD ); /* temporarily use Z for reordering */
+ pffft_zreorder(s, Z, Y, PFFFT_FORWARD );
+ }
+
+ pwrOther = -1.0;
+ pwrCar = 0;
+
+
+ /* for positive frequencies: 0 to 0.5 * samplerate */
+ /* and also for negative frequencies: -0.5 * samplerate to 0 */
+ for ( j = 0; j < ( cplx ? N : (1 + N/2) ); ++j )
+ {
+ if (!cplx && !j) /* special treatment for DC for real input */
+ pwr = Y[j]*Y[j];
+ else if (!cplx && j == N/2) /* treat 0.5 * samplerate */
+ pwr = Y[1] * Y[1]; /* despite j (for freq calculation) we have index 1 */
+ else
+ pwr = Y[2*j] * Y[2*j] + Y[2*j+1] * Y[2*j+1];
+ if (iter || PRINT_SPEC)
+ printf("%s fft %d: pwr[j = %d] = %g == %f dB\n", (cplx ? "cplx":"real"), N, j, pwr, PWR2LOG(pwr) );
+ if (k == j)
+ pwrCar = pwr;
+ else if ( pwr > pwrOther ) {
+ pwrOther = pwr;
+ kmaxOther = j;
+ }
+ }
+
+ if ( PWR2LOG(pwrCar) - PWR2LOG(pwrOther) < EXPECTED_DYN_RANGE ) {
+ printf("%s fft %d amp %f iter %d:\n", (cplx ? "cplx":"real"), N, amp, iter);
+ printf(" carrier power at bin %d: %g == %f dB\n", k, pwrCar, PWR2LOG(pwrCar) );
+ printf(" carrier mag || at bin %d: %g\n", k, sqrt(pwrCar) );
+ printf(" max other pwr at bin %d: %g == %f dB\n", kmaxOther, pwrOther, PWR2LOG(pwrOther) );
+ printf(" dynamic range: %f dB\n\n", PWR2LOG(pwrCar) - PWR2LOG(pwrOther) );
+ retError = 1;
+ if ( iter == 0 )
+ continue;
+ }
+
+ if ( k > 0 && k != N/2 )
+ {
+ phi = atan2( Y[2*k+1], Y[2*k] );
+ if ( fabs( phi - phi0) > DEG_ERR_LIMIT * M_PI / 180.0 )
+ {
+ retError = 1;
+ printf("%s fft %d bin %d amp %f : phase mismatch! phase = %f deg expected = %f deg\n",
+ (cplx ? "cplx":"real"), N, k, amp, phi * 180.0 / M_PI, phi0 * 180.0 / M_PI );
+ }
+ }
+
+ expextedMag = cplx ? amp : ( (k == 0 || k == N/2) ? amp : (amp/2) );
+ mag = sqrt(pwrCar) / N;
+ if ( fabs(mag - expextedMag) > MAG_ERR_LIMIT )
+ {
+ retError = 1;
+ printf("%s fft %d bin %d amp %f : mag = %g expected = %g\n", (cplx ? "cplx":"real"), N, k, amp, mag, expextedMag );
+ }
+
+
+ /* now convert spectrum back */
+ pffft_transform_ordered(s, Y, Z, W, PFFFT_BACKWARD);
+
+ errSum = 0.0;
+ for ( j = 0; j < (cplx ? (2*N) : N); ++j )
+ {
+ /* scale back */
+ Z[j] /= N;
+ /* square sum errors over real (and imag parts) */
+ err = (X[j]-Z[j]) * (X[j]-Z[j]);
+ errSum += err;
+ }
+
+ if ( errSum > N * 1E-7 )
+ {
+ retError = 1;
+ printf("%s fft %d bin %d : inverse FFT doesn't match original signal! errSum = %g ; mean err = %g\n", (cplx ? "cplx":"real"), N, k, errSum, errSum / N);
+ }
+
+ break;
+ }
+
+ }
+ pffft_destroy_setup(s);
+ pffft_aligned_free(X);
+ pffft_aligned_free(Y);
+ pffft_aligned_free(Z);
+ pffft_aligned_free(W);
+
+ return retError;
+}
+
+
+
+int main(int argc, char **argv)
+{
+ int N, result, resN, resAll;
+
+ resAll = 0;
+ for ( N = 32; N <= 65536; N *= 2 )
+ {
+ result = test(N, 1 /* cplx fft */, 1 /* useOrdered */);
+ resN = result;
+ resAll |= result;
+
+ result = test(N, 0 /* cplx fft */, 1 /* useOrdered */);
+ resN |= result;
+ resAll |= result;
+
+ result = test(N, 1 /* cplx fft */, 0 /* useOrdered */);
+ resN |= result;
+ resAll |= result;
+
+ result = test(N, 0 /* cplx fft */, 0 /* useOrdered */);
+ resN |= result;
+ resAll |= result;
+
+ if (!resN)
+ printf("tests for size %d succeeded successfully.\n", N);
+ }
+
+ if (!resAll)
+ printf("all tests succeeded successfully.\n");
+
+ return resAll;
+}
+