moved common functions into pffft_common.c

Signed-off-by: hayati ayguen <h_ayguen@web.de>
author: hayati ayguen <h_ayguen@web.de> 2020-04-13 04:02:07 +0200
committer: hayati ayguen <h_ayguen@web.de> 2020-04-13 04:02:07 +0200
commit: eeb17fc8a08078372de542647841750136e1cf85 (patch)
tree: f0e45956576f54b3fcc65025fef46e8afde451a6
parent: ca8c5f815c83299a949761700afcb99c485ea61a (diff)
download: pffft-eeb17fc8a08078372de542647841750136e1cf85.tar.gz
10 files changed, 137 insertions, 143 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 54cb7c2..9b6bc10 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -97,7 +97,7 @@ endif()
 
 ######################################################
 
-add_library(PFFFT STATIC ${FLOAT_SOURCES} ${DOUBLE_SOURCES} pffft_priv_impl.h pffft.hpp )
+add_library(PFFFT STATIC ${FLOAT_SOURCES} ${DOUBLE_SOURCES} pffft_common.c pffft_priv_impl.h pffft.hpp )
 target_compile_definitions(PFFFT PRIVATE _USE_MATH_DEFINES)
 if (USE_SCALAR_VECT)
   target_compile_definitions(PFFFT PRIVATE PFFFT_SCALVEC_ENABLED=1)
diff --git a/pffft.c b/pffft.c
index dde14e4..059f2d7 100644
--- a/pffft.c
+++ b/pffft.c
@@ -106,9 +106,6 @@
 #define FUNC_ZREORDER              pffft_zreorder
 #define FUNC_ZCONVOLVE_ACCUMULATE  pffft_zconvolve_accumulate
 #define FUNC_ZCONVOLVE_NO_ACCU     pffft_zconvolve_no_accu
-#define FUNC_MIN_FFT_SIZE          pffft_min_fft_size
-#define FUNC_NEXT_POWER_OF_TWO     pffft_next_power_of_two
-#define FUNC_IS_POWER_OF_TWO       pffft_is_power_of_two
 
 #define FUNC_ALIGNED_MALLOC        pffft_aligned_malloc
 #define FUNC_ALIGNED_FREE          pffft_aligned_free
diff --git a/pffft.h b/pffft.h
index 38dc342..31bb731 100644
--- a/pffft.h
+++ b/pffft.h
@@ -173,13 +173,22 @@ extern "C" {
      *not* have been reordered with pffft_zreorder (otherwise just
      perform the operation yourself as the dft coefs are stored as
      interleaved complex numbers).
-     
+
      the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
-     
+
      The dft_a, dft_b and dft_ab pointers may alias.
   */
   void pffft_zconvolve_no_accu(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
 
+  /* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */
+  int pffft_simd_size();
+
+  /* return string identifier of used architecture (SSE/NEON/Altivec/..) */
+  const char * pffft_simd_arch();
+
+
+  /* following functions are identical to the pffftd_ functions */
+
   /* simple helper to get minimum possible fft size */
   int pffft_min_fft_size(pffft_transform_t transform);
 
@@ -199,12 +208,6 @@ extern "C" {
   void *pffft_aligned_malloc(size_t nb_bytes);
   void pffft_aligned_free(void *);
 
-  /* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */
-  int pffft_simd_size();
-
-  /* return string identifier of used architecture (SSE/NEON/Altivec/..) */
-  const char * pffft_simd_arch();
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/pffft.hpp b/pffft.hpp
index 832ebaf..ce910f9 100644
--- a/pffft.hpp
+++ b/pffft.hpp
@@ -55,22 +55,12 @@ template<typename T> struct Types {};
 template<> struct Types<float>  {
   typedef float  Scalar;
   typedef std::complex<Scalar> Complex;
-  static Scalar* alignedAlloc(int N) { return (Scalar*)pffft_aligned_malloc( N * sizeof(Scalar) ); }
-  static void    alignedFree(void *ptr) { pffft_aligned_free(ptr); }
-  static int minFFtsize() { return pffft_min_fft_size(PFFFT_REAL); }
-  static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); }
-  static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N); }
   static int simd_size() { return pffft_simd_size(); }
   static const char * simd_arch() { return pffft_simd_arch(); }
 };
 template<> struct Types< std::complex<float> >  {
   typedef float  Scalar;
   typedef std::complex<float>  Complex;
-  static Complex* alignedAlloc(int N) { return (Complex*)pffft_aligned_malloc( N * sizeof(Complex) ); }
-  static void     alignedFree(void *ptr) { pffft_aligned_free(ptr); }
-  static int minFFtsize() { return pffft_min_fft_size(PFFFT_COMPLEX); }
-  static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); }
-  static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N); }
   static int simd_size() { return pffft_simd_size(); }
   static const char * simd_arch() { return pffft_simd_arch(); }
 };
@@ -79,22 +69,12 @@ template<> struct Types< std::complex<float> >  {
 template<> struct Types<double> {
   typedef double Scalar;
   typedef std::complex<Scalar> Complex;
-  static Scalar* alignedAlloc(int N) { return (Scalar*)pffftd_aligned_malloc( N * sizeof(Scalar) ); }
-  static void    alignedFree(void *ptr) { pffftd_aligned_free(ptr); }
-  static int minFFtsize() { return pffftd_min_fft_size(PFFFT_REAL); }
-  static int nextPowerOfTwo(int N) { return pffftd_next_power_of_two(N); }
-  static bool isPowerOfTwo(int N) { return pffftd_is_power_of_two(N); }
   static int simd_size() { return pffftd_simd_size(); }
   static const char * simd_arch() { return pffftd_simd_arch(); }
 };
 template<> struct Types< std::complex<double> > {
   typedef double Scalar;
   typedef std::complex<double> Complex;
-  static Complex* alignedAlloc(int N) { return (Complex*)pffftd_aligned_malloc( N * sizeof(Complex) ); }
-  static void     alignedFree(void *ptr) { pffftd_aligned_free(ptr); }
-  static int minFFtsize() { return pffftd_min_fft_size(PFFFT_COMPLEX); }
-  static int nextPowerOfTwo(int N) { return pffftd_next_power_of_two(N); }
-  static bool isPowerOfTwo(int N) { return pffftd_is_power_of_two(N); }
   static int simd_size() { return pffftd_simd_size(); }
   static const char * simd_arch() { return pffftd_simd_arch(); }
 };
@@ -143,10 +123,12 @@ public:
   static bool isDoubleScalar() { return sizeof(Scalar) == sizeof(double); }
 
   // simple helper to get minimum possible fft length
-  static int minFFtsize() { return Types<T>::minFFtsize(); }
+  static int minFFtsize() { return pffft_min_fft_size( isComplexTransform() ? PFFFT_COMPLEX : PFFFT_REAL ); }
+
   // simple helper to determine next power of 2 - without inexact/rounding floating point operations
-  static int nextPowerOfTwo(int N) { return Types<T>::nextPowerOfTwo(N); }
-  static bool isPowerOfTwo(int N) { return Types<T>::isPowerOfTwo(N); }
+  static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); }
+  static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N); }
+
   static int simd_size() { return Types<T>::simd_size(); }
   static const char * simd_arch() { return Types<T>::simd_arch(); }
 
@@ -380,6 +362,31 @@ private:
 };
 
 
+template<typename T>
+inline T* alignedAlloc(int length) {
+  return (T*)pffft_aligned_malloc( length * sizeof(T) );
+}
+
+inline void alignedFree(void *ptr) {
+  pffft_aligned_free(ptr);
+}
+
+
+// simple helper to get minimum possible fft length
+inline int minFFtsize(pffft_transform_t transform) {
+  return pffft_min_fft_size(transform);
+}
+
+// simple helper to determine next power of 2 - without inexact/rounding floating point operations
+inline int nextPowerOfTwo(int N) {
+  return pffft_next_power_of_two(N);
+}
+
+inline bool isPowerOfTwo(int N) {
+  return pffft_is_power_of_two(N);
+}
+
+
 
 ////////////////////////////////////////////////////////////////////
 
@@ -659,7 +666,7 @@ inline Fft<T>::Fft(int length, int stackThresholdLen)
 template<typename T>
 inline Fft<T>::~Fft()
 {
-  Types<T>::alignedFree(work);
+  alignedFree(work);
 }
 
 template<typename T>
@@ -679,12 +686,12 @@ Fft<T>::prepareLength(int newLength)
   setup.prepareLength(length);
 
   if (work) {
-    Types<T>::alignedFree(work);
+    alignedFree(work);
     work = NULL;
   }
 
   if (useHeap) {
-    work = reinterpret_cast<Scalar*>( Types<T>::alignedAlloc(length) );
+    work = reinterpret_cast<Scalar*>( alignedAllocType(length) );
   }
 }
 
@@ -861,7 +868,7 @@ template<typename T>
 inline void
 Fft<T>::alignedFree(void* ptr)
 {
-  Types<T>::alignedFree(ptr);
+  pffft::alignedFree(ptr);
 }
 
 
@@ -869,21 +876,21 @@ template<typename T>
 inline T*
 pffft::Fft<T>::alignedAllocType(int length)
 {
-  return Types<T>::alignedAlloc(length);
+  return alignedAlloc<T>(length);
 }
 
 template<typename T>
 inline typename pffft::Fft<T>::Scalar*
 pffft::Fft<T>::alignedAllocScalar(int length)
 {
-  return Types<Scalar>::alignedAlloc(length);
+  return alignedAlloc<Scalar>(length);
 }
 
 template<typename T>
 inline typename Fft<T>::Complex *
 Fft<T>::alignedAllocComplex(int length)
 {
-  return Types<Complex>::alignedAlloc(length);
+  return alignedAlloc<Complex>(length);
 }
 
 
@@ -954,7 +961,7 @@ class PFAlloc {
 
     // allocate but don't initialize num elements of type T
     pointer allocate (size_type num, const void* = 0) {
-        pointer ret = (pointer)( Types<T>::alignedAlloc(num) );
+        pointer ret = (pointer)( alignedAlloc<T>(num) );
         return ret;
     }
 
@@ -973,7 +980,7 @@ class PFAlloc {
     // deallocate storage p of deleted elements
     void deallocate (pointer p, size_type num) {
         // deallocate memory with pffft
-        Types<T>::alignedFree( (void*)p );
+        alignedFree( (void*)p );
     }
 };
 
diff --git a/pffft_common.c b/pffft_common.c
new file mode 100644
index 0000000..1121ac7
--- /dev/null
+++ b/pffft_common.c
@@ -0,0 +1,68 @@
+
+#include "pffft.h"
+
+#include <stdlib.h>
+
+/* SSE and co like 16-bytes aligned pointers
+ * with a 64-byte alignment, we are even aligned on L2 cache lines... */
+#define MALLOC_V4SF_ALIGNMENT 64
+
+static void * Valigned_malloc(size_t nb_bytes) {
+  void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
+  if (!p0) return (void *) 0;
+  p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1))));
+  *((void **) p - 1) = p0;
+  return p;
+}
+
+static void Valigned_free(void *p) {
+  if (p) free(*((void **) p - 1));
+}
+
+
+static int next_power_of_two(int N) {
+  /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
+  /* compute the next highest power of 2 of 32-bit v */
+  unsigned v = N;
+  v--;
+  v |= v >> 1;
+  v |= v >> 2;
+  v |= v >> 4;
+  v |= v >> 8;
+  v |= v >> 16;
+  v++;
+  return v;
+}
+
+static int is_power_of_two(int N) {
+  /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */
+  int f = N && !(N & (N - 1));
+  return f;
+}
+
+static int min_fft_size(pffft_transform_t transform) {
+  /* unfortunately, the fft size must be a multiple of 16 for complex FFTs
+     and 32 for real FFTs -- a lot of stuff would need to be rewritten to
+     handle other cases (or maybe just switch to a scalar fft, I don't know..) */
+  int simdSz = pffft_simd_size();
+  if (transform == PFFFT_REAL)
+    return ( 2 * simdSz * simdSz );
+  else if (transform == PFFFT_COMPLEX)
+    return ( simdSz * simdSz );
+  else
+    return 1;
+}
+
+
+void *pffft_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); }
+void pffft_aligned_free(void *p) { Valigned_free(p); }
+int pffft_next_power_of_two(int N) { return next_power_of_two(N); }
+int pffft_is_power_of_two(int N) { return is_power_of_two(N); }
+int pffft_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); }
+
+void *pffftd_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); }
+void pffftd_aligned_free(void *p) { Valigned_free(p); }
+int pffftd_next_power_of_two(int N) { return next_power_of_two(N); }
+int pffftd_is_power_of_two(int N) { return is_power_of_two(N); }
+int pffftd_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); }
+
diff --git a/pffft_double.c b/pffft_double.c
index dd0295c..28c0832 100644
--- a/pffft_double.c
+++ b/pffft_double.c
@@ -117,9 +117,6 @@
 #define FUNC_ZREORDER              pffftd_zreorder
 #define FUNC_ZCONVOLVE_ACCUMULATE  pffftd_zconvolve_accumulate
 #define FUNC_ZCONVOLVE_NO_ACCU     pffftd_zconvolve_no_accu
-#define FUNC_MIN_FFT_SIZE          pffftd_min_fft_size
-#define FUNC_NEXT_POWER_OF_TWO     pffftd_next_power_of_two
-#define FUNC_IS_POWER_OF_TWO       pffftd_is_power_of_two
 
 #define FUNC_ALIGNED_MALLOC        pffftd_aligned_malloc
 #define FUNC_ALIGNED_FREE          pffftd_aligned_free
diff --git a/pffft_double.h b/pffft_double.h
index cd460ed..d83c06d 100644
--- a/pffft_double.h
+++ b/pffft_double.h
@@ -172,19 +172,27 @@ extern "C" {
   void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling);
 
   /* 
-    Perform a multiplication of the frequency components of dft_a and
-    dft_b and put result in dft_ab. The arrays should have
-    been obtained with pffft_transform(.., PFFFT_FORWARD) and should
-    *not* have been reordered with pffft_zreorder (otherwise just
-    perform the operation yourself as the dft coefs are stored as
-    interleaved complex numbers).
-     
-    the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
-     
-    The dft_a, dft_b and dft_ab pointers may alias.
+     Perform a multiplication of the frequency components of dft_a and
+     dft_b and put result in dft_ab. The arrays should have
+     been obtained with pffft_transform(.., PFFFT_FORWARD) and should
+     *not* have been reordered with pffft_zreorder (otherwise just
+     perform the operation yourself as the dft coefs are stored as
+     interleaved complex numbers).
+
+     the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
+
+     The dft_a, dft_b and dft_ab pointers may alias.
   */
   void pffftd_zconvolve_no_accu(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double*dft_ab, double scaling);
 
+  /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */
+  int pffftd_simd_size();
+
+  /* return string identifier of used architecture (AVX/..) */
+  const char * pffftd_simd_arch();
+
+
+  /* following functions are identical to the pffft_ functions */
 
   /* simple helper to get minimum possible fft size */
   int pffftd_min_fft_size(pffft_transform_t transform);
@@ -205,12 +213,6 @@ extern "C" {
   void *pffftd_aligned_malloc(size_t nb_bytes);
   void pffftd_aligned_free(void *);
 
-  /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */
-  int pffftd_simd_size();
-
-  /* return string identifier of used architecture (SSE/NEON/Altivec/..) */
-  const char * pffftd_simd_arch();
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/pffft_priv_impl.h b/pffft_priv_impl.h
index e051b0f..36cae59 100644
--- a/pffft_priv_impl.h
+++ b/pffft_priv_impl.h
@@ -69,14 +69,6 @@
 #endif
 
 
-void *FUNC_ALIGNED_MALLOC(size_t nb_bytes) {
-  return Valigned_malloc(nb_bytes);
-}
-
-void FUNC_ALIGNED_FREE(void *p) {
-  Valigned_free(p);
-}
-
 int FUNC_SIMD_SIZE() { return SIMD_SZ; }
 
 const char * FUNC_SIMD_ARCH() { return VARCH; }
@@ -2195,37 +2187,3 @@ int FUNC_VALIDATE_SIMD_EX(FILE * DbgOut)
 
 #endif  /* end if ( SIMD_SZ == 4 ) */
 
-
-
-int FUNC_NEXT_POWER_OF_TWO(int N) {
-  /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
-  /* compute the next highest power of 2 of 32-bit v */
-  unsigned v = N;
-  v--;
-  v |= v >> 1;
-  v |= v >> 2;
-  v |= v >> 4;
-  v |= v >> 8;
-  v |= v >> 16;
-  v++;
-  return v;
-}
-
-int FUNC_IS_POWER_OF_TWO(int N) {
-  /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */
-  int f = N && !(N & (N - 1));
-  return f;
-}
-
-int FUNC_MIN_FFT_SIZE(pffft_transform_t transform) {
-  /* unfortunately, the fft size must be a multiple of 16 for complex FFTs
-     and 32 for real FFTs -- a lot of stuff would need to be rewritten to
-     handle other cases (or maybe just switch to a scalar fft, I don't know..) */
-  if (transform == PFFFT_REAL)
-    return ( 2 * SIMD_SZ * SIMD_SZ );
-  else if (transform == PFFFT_COMPLEX)
-    return ( SIMD_SZ * SIMD_SZ );
-  else
-    return 1;
-}
-
diff --git a/simd/pf_double.h b/simd/pf_double.h
index 718172c..c6bac31 100644
--- a/simd/pf_double.h
+++ b/simd/pf_double.h
@@ -78,24 +78,5 @@ typedef double vsfscalar;
 #define SVMUL(f,v) VMUL(LD_PS1(f),v)
 #endif
 
-/* SSE and co like 16-bytes aligned pointers
- * with a 64-byte alignment, we are even aligned on L2 cache lines... */
-#define MALLOC_V4SF_ALIGNMENT 64
-
-static
-void *Valigned_malloc(size_t nb_bytes) {
-  void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
-  if (!p0) return (void *) 0;
-  p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1))));
-  *((void **) p - 1) = p0;
-  return p;
-}
-
-static
-void Valigned_free(void *p) {
-  if (p) free(*((void **) p - 1));
-}
-
-
 #endif /* PF_DBL_H */
 
diff --git a/simd/pf_float.h b/simd/pf_float.h
index 2491a42..1798194 100644
--- a/simd/pf_float.h
+++ b/simd/pf_float.h
@@ -80,24 +80,5 @@ typedef float vsfscalar;
 #define SVMUL(f,v) VMUL(LD_PS1(f),v)
 #endif
 
-/* SSE and co like 16-bytes aligned pointers
- * with a 64-byte alignment, we are even aligned on L2 cache lines... */
-#define MALLOC_V4SF_ALIGNMENT 64
-
-static
-void *Valigned_malloc(size_t nb_bytes) {
-  void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
-  if (!p0) return (void *) 0;
-  p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1))));
-  *((void **) p - 1) = p0;
-  return p;
-}
-
-static
-void Valigned_free(void *p) {
-  if (p) free(*((void **) p - 1));
-}
-
-
 #endif /* PF_FLT_H */
author	hayati ayguen <h_ayguen@web.de>	2020-04-13 04:02:07 +0200
committer	hayati ayguen <h_ayguen@web.de>	2020-04-13 04:02:07 +0200
commit	eeb17fc8a08078372de542647841750136e1cf85 (patch)
tree	f0e45956576f54b3fcc65025fef46e8afde451a6
parent	ca8c5f815c83299a949761700afcb99c485ea61a (diff)
download	pffft-eeb17fc8a08078372de542647841750136e1cf85.tar.gz