diff options
author | hayati ayguen <h_ayguen@web.de> | 2020-04-13 04:02:07 +0200 |
---|---|---|
committer | hayati ayguen <h_ayguen@web.de> | 2020-04-13 04:02:07 +0200 |
commit | eeb17fc8a08078372de542647841750136e1cf85 (patch) | |
tree | f0e45956576f54b3fcc65025fef46e8afde451a6 | |
parent | ca8c5f815c83299a949761700afcb99c485ea61a (diff) | |
download | pffft-eeb17fc8a08078372de542647841750136e1cf85.tar.gz |
moved common functions into pffft_common.c
Signed-off-by: hayati ayguen <h_ayguen@web.de>
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | pffft.c | 3 | ||||
-rw-r--r-- | pffft.h | 19 | ||||
-rw-r--r-- | pffft.hpp | 71 | ||||
-rw-r--r-- | pffft_common.c | 68 | ||||
-rw-r--r-- | pffft_double.c | 3 | ||||
-rw-r--r-- | pffft_double.h | 34 | ||||
-rw-r--r-- | pffft_priv_impl.h | 42 | ||||
-rw-r--r-- | simd/pf_double.h | 19 | ||||
-rw-r--r-- | simd/pf_float.h | 19 |
10 files changed, 137 insertions, 143 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 54cb7c2..9b6bc10 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,7 +97,7 @@ endif() ###################################################### -add_library(PFFFT STATIC ${FLOAT_SOURCES} ${DOUBLE_SOURCES} pffft_priv_impl.h pffft.hpp ) +add_library(PFFFT STATIC ${FLOAT_SOURCES} ${DOUBLE_SOURCES} pffft_common.c pffft_priv_impl.h pffft.hpp ) target_compile_definitions(PFFFT PRIVATE _USE_MATH_DEFINES) if (USE_SCALAR_VECT) target_compile_definitions(PFFFT PRIVATE PFFFT_SCALVEC_ENABLED=1) @@ -106,9 +106,6 @@ #define FUNC_ZREORDER pffft_zreorder #define FUNC_ZCONVOLVE_ACCUMULATE pffft_zconvolve_accumulate #define FUNC_ZCONVOLVE_NO_ACCU pffft_zconvolve_no_accu -#define FUNC_MIN_FFT_SIZE pffft_min_fft_size -#define FUNC_NEXT_POWER_OF_TWO pffft_next_power_of_two -#define FUNC_IS_POWER_OF_TWO pffft_is_power_of_two #define FUNC_ALIGNED_MALLOC pffft_aligned_malloc #define FUNC_ALIGNED_FREE pffft_aligned_free @@ -173,13 +173,22 @@ extern "C" { *not* have been reordered with pffft_zreorder (otherwise just perform the operation yourself as the dft coefs are stored as interleaved complex numbers). - + the operation performed is: dft_ab = (dft_a * fdt_b)*scaling - + The dft_a, dft_b and dft_ab pointers may alias. */ void pffft_zconvolve_no_accu(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); + /* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */ + int pffft_simd_size(); + + /* return string identifier of used architecture (SSE/NEON/Altivec/..) */ + const char * pffft_simd_arch(); + + + /* following functions are identical to the pffftd_ functions */ + /* simple helper to get minimum possible fft size */ int pffft_min_fft_size(pffft_transform_t transform); @@ -199,12 +208,6 @@ extern "C" { void *pffft_aligned_malloc(size_t nb_bytes); void pffft_aligned_free(void *); - /* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */ - int pffft_simd_size(); - - /* return string identifier of used architecture (SSE/NEON/Altivec/..) */ - const char * pffft_simd_arch(); - #ifdef __cplusplus } #endif @@ -55,22 +55,12 @@ template<typename T> struct Types {}; template<> struct Types<float> { typedef float Scalar; typedef std::complex<Scalar> Complex; - static Scalar* alignedAlloc(int N) { return (Scalar*)pffft_aligned_malloc( N * sizeof(Scalar) ); } - static void alignedFree(void *ptr) { pffft_aligned_free(ptr); } - static int minFFtsize() { return pffft_min_fft_size(PFFFT_REAL); } - static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); } - static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N); } static int simd_size() { return pffft_simd_size(); } static const char * simd_arch() { return pffft_simd_arch(); } }; template<> struct Types< std::complex<float> > { typedef float Scalar; typedef std::complex<float> Complex; - static Complex* alignedAlloc(int N) { return (Complex*)pffft_aligned_malloc( N * sizeof(Complex) ); } - static void alignedFree(void *ptr) { pffft_aligned_free(ptr); } - static int minFFtsize() { return pffft_min_fft_size(PFFFT_COMPLEX); } - static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); } - static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N); } static int simd_size() { return pffft_simd_size(); } static const char * simd_arch() { return pffft_simd_arch(); } }; @@ -79,22 +69,12 @@ template<> struct Types< std::complex<float> > { template<> struct Types<double> { typedef double Scalar; typedef std::complex<Scalar> Complex; - static Scalar* alignedAlloc(int N) { return (Scalar*)pffftd_aligned_malloc( N * sizeof(Scalar) ); } - static void alignedFree(void *ptr) { pffftd_aligned_free(ptr); } - static int minFFtsize() { return pffftd_min_fft_size(PFFFT_REAL); } - static int nextPowerOfTwo(int N) { return pffftd_next_power_of_two(N); } - static bool isPowerOfTwo(int N) { return pffftd_is_power_of_two(N); } static int simd_size() { return pffftd_simd_size(); } static const char * simd_arch() { return pffftd_simd_arch(); } }; template<> struct Types< std::complex<double> > { typedef double Scalar; typedef std::complex<double> Complex; - static Complex* alignedAlloc(int N) { return (Complex*)pffftd_aligned_malloc( N * sizeof(Complex) ); } - static void alignedFree(void *ptr) { pffftd_aligned_free(ptr); } - static int minFFtsize() { return pffftd_min_fft_size(PFFFT_COMPLEX); } - static int nextPowerOfTwo(int N) { return pffftd_next_power_of_two(N); } - static bool isPowerOfTwo(int N) { return pffftd_is_power_of_two(N); } static int simd_size() { return pffftd_simd_size(); } static const char * simd_arch() { return pffftd_simd_arch(); } }; @@ -143,10 +123,12 @@ public: static bool isDoubleScalar() { return sizeof(Scalar) == sizeof(double); } // simple helper to get minimum possible fft length - static int minFFtsize() { return Types<T>::minFFtsize(); } + static int minFFtsize() { return pffft_min_fft_size( isComplexTransform() ? PFFFT_COMPLEX : PFFFT_REAL ); } + // simple helper to determine next power of 2 - without inexact/rounding floating point operations - static int nextPowerOfTwo(int N) { return Types<T>::nextPowerOfTwo(N); } - static bool isPowerOfTwo(int N) { return Types<T>::isPowerOfTwo(N); } + static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); } + static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N); } + static int simd_size() { return Types<T>::simd_size(); } static const char * simd_arch() { return Types<T>::simd_arch(); } @@ -380,6 +362,31 @@ private: }; +template<typename T> +inline T* alignedAlloc(int length) { + return (T*)pffft_aligned_malloc( length * sizeof(T) ); +} + +inline void alignedFree(void *ptr) { + pffft_aligned_free(ptr); +} + + +// simple helper to get minimum possible fft length +inline int minFFtsize(pffft_transform_t transform) { + return pffft_min_fft_size(transform); +} + +// simple helper to determine next power of 2 - without inexact/rounding floating point operations +inline int nextPowerOfTwo(int N) { + return pffft_next_power_of_two(N); +} + +inline bool isPowerOfTwo(int N) { + return pffft_is_power_of_two(N); +} + + //////////////////////////////////////////////////////////////////// @@ -659,7 +666,7 @@ inline Fft<T>::Fft(int length, int stackThresholdLen) template<typename T> inline Fft<T>::~Fft() { - Types<T>::alignedFree(work); + alignedFree(work); } template<typename T> @@ -679,12 +686,12 @@ Fft<T>::prepareLength(int newLength) setup.prepareLength(length); if (work) { - Types<T>::alignedFree(work); + alignedFree(work); work = NULL; } if (useHeap) { - work = reinterpret_cast<Scalar*>( Types<T>::alignedAlloc(length) ); + work = reinterpret_cast<Scalar*>( alignedAllocType(length) ); } } @@ -861,7 +868,7 @@ template<typename T> inline void Fft<T>::alignedFree(void* ptr) { - Types<T>::alignedFree(ptr); + pffft::alignedFree(ptr); } @@ -869,21 +876,21 @@ template<typename T> inline T* pffft::Fft<T>::alignedAllocType(int length) { - return Types<T>::alignedAlloc(length); + return alignedAlloc<T>(length); } template<typename T> inline typename pffft::Fft<T>::Scalar* pffft::Fft<T>::alignedAllocScalar(int length) { - return Types<Scalar>::alignedAlloc(length); + return alignedAlloc<Scalar>(length); } template<typename T> inline typename Fft<T>::Complex * Fft<T>::alignedAllocComplex(int length) { - return Types<Complex>::alignedAlloc(length); + return alignedAlloc<Complex>(length); } @@ -954,7 +961,7 @@ class PFAlloc { // allocate but don't initialize num elements of type T pointer allocate (size_type num, const void* = 0) { - pointer ret = (pointer)( Types<T>::alignedAlloc(num) ); + pointer ret = (pointer)( alignedAlloc<T>(num) ); return ret; } @@ -973,7 +980,7 @@ class PFAlloc { // deallocate storage p of deleted elements void deallocate (pointer p, size_type num) { // deallocate memory with pffft - Types<T>::alignedFree( (void*)p ); + alignedFree( (void*)p ); } }; diff --git a/pffft_common.c b/pffft_common.c new file mode 100644 index 0000000..1121ac7 --- /dev/null +++ b/pffft_common.c @@ -0,0 +1,68 @@ + +#include "pffft.h" + +#include <stdlib.h> + +/* SSE and co like 16-bytes aligned pointers + * with a 64-byte alignment, we are even aligned on L2 cache lines... */ +#define MALLOC_V4SF_ALIGNMENT 64 + +static void * Valigned_malloc(size_t nb_bytes) { + void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); + if (!p0) return (void *) 0; + p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1)))); + *((void **) p - 1) = p0; + return p; +} + +static void Valigned_free(void *p) { + if (p) free(*((void **) p - 1)); +} + + +static int next_power_of_two(int N) { + /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ + /* compute the next highest power of 2 of 32-bit v */ + unsigned v = N; + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +static int is_power_of_two(int N) { + /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */ + int f = N && !(N & (N - 1)); + return f; +} + +static int min_fft_size(pffft_transform_t transform) { + /* unfortunately, the fft size must be a multiple of 16 for complex FFTs + and 32 for real FFTs -- a lot of stuff would need to be rewritten to + handle other cases (or maybe just switch to a scalar fft, I don't know..) */ + int simdSz = pffft_simd_size(); + if (transform == PFFFT_REAL) + return ( 2 * simdSz * simdSz ); + else if (transform == PFFFT_COMPLEX) + return ( simdSz * simdSz ); + else + return 1; +} + + +void *pffft_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); } +void pffft_aligned_free(void *p) { Valigned_free(p); } +int pffft_next_power_of_two(int N) { return next_power_of_two(N); } +int pffft_is_power_of_two(int N) { return is_power_of_two(N); } +int pffft_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); } + +void *pffftd_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); } +void pffftd_aligned_free(void *p) { Valigned_free(p); } +int pffftd_next_power_of_two(int N) { return next_power_of_two(N); } +int pffftd_is_power_of_two(int N) { return is_power_of_two(N); } +int pffftd_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); } + diff --git a/pffft_double.c b/pffft_double.c index dd0295c..28c0832 100644 --- a/pffft_double.c +++ b/pffft_double.c @@ -117,9 +117,6 @@ #define FUNC_ZREORDER pffftd_zreorder #define FUNC_ZCONVOLVE_ACCUMULATE pffftd_zconvolve_accumulate #define FUNC_ZCONVOLVE_NO_ACCU pffftd_zconvolve_no_accu -#define FUNC_MIN_FFT_SIZE pffftd_min_fft_size -#define FUNC_NEXT_POWER_OF_TWO pffftd_next_power_of_two -#define FUNC_IS_POWER_OF_TWO pffftd_is_power_of_two #define FUNC_ALIGNED_MALLOC pffftd_aligned_malloc #define FUNC_ALIGNED_FREE pffftd_aligned_free diff --git a/pffft_double.h b/pffft_double.h index cd460ed..d83c06d 100644 --- a/pffft_double.h +++ b/pffft_double.h @@ -172,19 +172,27 @@ extern "C" { void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling); /* - Perform a multiplication of the frequency components of dft_a and - dft_b and put result in dft_ab. The arrays should have - been obtained with pffft_transform(.., PFFFT_FORWARD) and should - *not* have been reordered with pffft_zreorder (otherwise just - perform the operation yourself as the dft coefs are stored as - interleaved complex numbers). - - the operation performed is: dft_ab = (dft_a * fdt_b)*scaling - - The dft_a, dft_b and dft_ab pointers may alias. + Perform a multiplication of the frequency components of dft_a and + dft_b and put result in dft_ab. The arrays should have + been obtained with pffft_transform(.., PFFFT_FORWARD) and should + *not* have been reordered with pffft_zreorder (otherwise just + perform the operation yourself as the dft coefs are stored as + interleaved complex numbers). + + the operation performed is: dft_ab = (dft_a * fdt_b)*scaling + + The dft_a, dft_b and dft_ab pointers may alias. */ void pffftd_zconvolve_no_accu(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double*dft_ab, double scaling); + /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */ + int pffftd_simd_size(); + + /* return string identifier of used architecture (AVX/..) */ + const char * pffftd_simd_arch(); + + + /* following functions are identical to the pffft_ functions */ /* simple helper to get minimum possible fft size */ int pffftd_min_fft_size(pffft_transform_t transform); @@ -205,12 +213,6 @@ extern "C" { void *pffftd_aligned_malloc(size_t nb_bytes); void pffftd_aligned_free(void *); - /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */ - int pffftd_simd_size(); - - /* return string identifier of used architecture (SSE/NEON/Altivec/..) */ - const char * pffftd_simd_arch(); - #ifdef __cplusplus } #endif diff --git a/pffft_priv_impl.h b/pffft_priv_impl.h index e051b0f..36cae59 100644 --- a/pffft_priv_impl.h +++ b/pffft_priv_impl.h @@ -69,14 +69,6 @@ #endif -void *FUNC_ALIGNED_MALLOC(size_t nb_bytes) { - return Valigned_malloc(nb_bytes); -} - -void FUNC_ALIGNED_FREE(void *p) { - Valigned_free(p); -} - int FUNC_SIMD_SIZE() { return SIMD_SZ; } const char * FUNC_SIMD_ARCH() { return VARCH; } @@ -2195,37 +2187,3 @@ int FUNC_VALIDATE_SIMD_EX(FILE * DbgOut) #endif /* end if ( SIMD_SZ == 4 ) */ - - -int FUNC_NEXT_POWER_OF_TWO(int N) { - /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ - /* compute the next highest power of 2 of 32-bit v */ - unsigned v = N; - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -int FUNC_IS_POWER_OF_TWO(int N) { - /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */ - int f = N && !(N & (N - 1)); - return f; -} - -int FUNC_MIN_FFT_SIZE(pffft_transform_t transform) { - /* unfortunately, the fft size must be a multiple of 16 for complex FFTs - and 32 for real FFTs -- a lot of stuff would need to be rewritten to - handle other cases (or maybe just switch to a scalar fft, I don't know..) */ - if (transform == PFFFT_REAL) - return ( 2 * SIMD_SZ * SIMD_SZ ); - else if (transform == PFFFT_COMPLEX) - return ( SIMD_SZ * SIMD_SZ ); - else - return 1; -} - diff --git a/simd/pf_double.h b/simd/pf_double.h index 718172c..c6bac31 100644 --- a/simd/pf_double.h +++ b/simd/pf_double.h @@ -78,24 +78,5 @@ typedef double vsfscalar; #define SVMUL(f,v) VMUL(LD_PS1(f),v) #endif -/* SSE and co like 16-bytes aligned pointers - * with a 64-byte alignment, we are even aligned on L2 cache lines... */ -#define MALLOC_V4SF_ALIGNMENT 64 - -static -void *Valigned_malloc(size_t nb_bytes) { - void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); - if (!p0) return (void *) 0; - p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1)))); - *((void **) p - 1) = p0; - return p; -} - -static -void Valigned_free(void *p) { - if (p) free(*((void **) p - 1)); -} - - #endif /* PF_DBL_H */ diff --git a/simd/pf_float.h b/simd/pf_float.h index 2491a42..1798194 100644 --- a/simd/pf_float.h +++ b/simd/pf_float.h @@ -80,24 +80,5 @@ typedef float vsfscalar; #define SVMUL(f,v) VMUL(LD_PS1(f),v) #endif -/* SSE and co like 16-bytes aligned pointers - * with a 64-byte alignment, we are even aligned on L2 cache lines... */ -#define MALLOC_V4SF_ALIGNMENT 64 - -static -void *Valigned_malloc(size_t nb_bytes) { - void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); - if (!p0) return (void *) 0; - p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1)))); - *((void **) p - 1) = p0; - return p; -} - -static -void Valigned_free(void *p) { - if (p) free(*((void **) p - 1)); -} - - #endif /* PF_FLT_H */ |