aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhayati ayguen <h_ayguen@web.de>2020-04-13 04:02:07 +0200
committerhayati ayguen <h_ayguen@web.de>2020-04-13 04:02:07 +0200
commiteeb17fc8a08078372de542647841750136e1cf85 (patch)
treef0e45956576f54b3fcc65025fef46e8afde451a6
parentca8c5f815c83299a949761700afcb99c485ea61a (diff)
downloadpffft-eeb17fc8a08078372de542647841750136e1cf85.tar.gz
moved common functions into pffft_common.c
Signed-off-by: hayati ayguen <h_ayguen@web.de>
-rw-r--r--CMakeLists.txt2
-rw-r--r--pffft.c3
-rw-r--r--pffft.h19
-rw-r--r--pffft.hpp71
-rw-r--r--pffft_common.c68
-rw-r--r--pffft_double.c3
-rw-r--r--pffft_double.h34
-rw-r--r--pffft_priv_impl.h42
-rw-r--r--simd/pf_double.h19
-rw-r--r--simd/pf_float.h19
10 files changed, 137 insertions, 143 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 54cb7c2..9b6bc10 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -97,7 +97,7 @@ endif()
######################################################
-add_library(PFFFT STATIC ${FLOAT_SOURCES} ${DOUBLE_SOURCES} pffft_priv_impl.h pffft.hpp )
+add_library(PFFFT STATIC ${FLOAT_SOURCES} ${DOUBLE_SOURCES} pffft_common.c pffft_priv_impl.h pffft.hpp )
target_compile_definitions(PFFFT PRIVATE _USE_MATH_DEFINES)
if (USE_SCALAR_VECT)
target_compile_definitions(PFFFT PRIVATE PFFFT_SCALVEC_ENABLED=1)
diff --git a/pffft.c b/pffft.c
index dde14e4..059f2d7 100644
--- a/pffft.c
+++ b/pffft.c
@@ -106,9 +106,6 @@
#define FUNC_ZREORDER pffft_zreorder
#define FUNC_ZCONVOLVE_ACCUMULATE pffft_zconvolve_accumulate
#define FUNC_ZCONVOLVE_NO_ACCU pffft_zconvolve_no_accu
-#define FUNC_MIN_FFT_SIZE pffft_min_fft_size
-#define FUNC_NEXT_POWER_OF_TWO pffft_next_power_of_two
-#define FUNC_IS_POWER_OF_TWO pffft_is_power_of_two
#define FUNC_ALIGNED_MALLOC pffft_aligned_malloc
#define FUNC_ALIGNED_FREE pffft_aligned_free
diff --git a/pffft.h b/pffft.h
index 38dc342..31bb731 100644
--- a/pffft.h
+++ b/pffft.h
@@ -173,13 +173,22 @@ extern "C" {
*not* have been reordered with pffft_zreorder (otherwise just
perform the operation yourself as the dft coefs are stored as
interleaved complex numbers).
-
+
the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
-
+
The dft_a, dft_b and dft_ab pointers may alias.
*/
void pffft_zconvolve_no_accu(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
+ /* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */
+ int pffft_simd_size();
+
+ /* return string identifier of used architecture (SSE/NEON/Altivec/..) */
+ const char * pffft_simd_arch();
+
+
+ /* following functions are identical to the pffftd_ functions */
+
/* simple helper to get minimum possible fft size */
int pffft_min_fft_size(pffft_transform_t transform);
@@ -199,12 +208,6 @@ extern "C" {
void *pffft_aligned_malloc(size_t nb_bytes);
void pffft_aligned_free(void *);
- /* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */
- int pffft_simd_size();
-
- /* return string identifier of used architecture (SSE/NEON/Altivec/..) */
- const char * pffft_simd_arch();
-
#ifdef __cplusplus
}
#endif
diff --git a/pffft.hpp b/pffft.hpp
index 832ebaf..ce910f9 100644
--- a/pffft.hpp
+++ b/pffft.hpp
@@ -55,22 +55,12 @@ template<typename T> struct Types {};
template<> struct Types<float> {
typedef float Scalar;
typedef std::complex<Scalar> Complex;
- static Scalar* alignedAlloc(int N) { return (Scalar*)pffft_aligned_malloc( N * sizeof(Scalar) ); }
- static void alignedFree(void *ptr) { pffft_aligned_free(ptr); }
- static int minFFtsize() { return pffft_min_fft_size(PFFFT_REAL); }
- static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); }
- static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N); }
static int simd_size() { return pffft_simd_size(); }
static const char * simd_arch() { return pffft_simd_arch(); }
};
template<> struct Types< std::complex<float> > {
typedef float Scalar;
typedef std::complex<float> Complex;
- static Complex* alignedAlloc(int N) { return (Complex*)pffft_aligned_malloc( N * sizeof(Complex) ); }
- static void alignedFree(void *ptr) { pffft_aligned_free(ptr); }
- static int minFFtsize() { return pffft_min_fft_size(PFFFT_COMPLEX); }
- static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); }
- static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N); }
static int simd_size() { return pffft_simd_size(); }
static const char * simd_arch() { return pffft_simd_arch(); }
};
@@ -79,22 +69,12 @@ template<> struct Types< std::complex<float> > {
template<> struct Types<double> {
typedef double Scalar;
typedef std::complex<Scalar> Complex;
- static Scalar* alignedAlloc(int N) { return (Scalar*)pffftd_aligned_malloc( N * sizeof(Scalar) ); }
- static void alignedFree(void *ptr) { pffftd_aligned_free(ptr); }
- static int minFFtsize() { return pffftd_min_fft_size(PFFFT_REAL); }
- static int nextPowerOfTwo(int N) { return pffftd_next_power_of_two(N); }
- static bool isPowerOfTwo(int N) { return pffftd_is_power_of_two(N); }
static int simd_size() { return pffftd_simd_size(); }
static const char * simd_arch() { return pffftd_simd_arch(); }
};
template<> struct Types< std::complex<double> > {
typedef double Scalar;
typedef std::complex<double> Complex;
- static Complex* alignedAlloc(int N) { return (Complex*)pffftd_aligned_malloc( N * sizeof(Complex) ); }
- static void alignedFree(void *ptr) { pffftd_aligned_free(ptr); }
- static int minFFtsize() { return pffftd_min_fft_size(PFFFT_COMPLEX); }
- static int nextPowerOfTwo(int N) { return pffftd_next_power_of_two(N); }
- static bool isPowerOfTwo(int N) { return pffftd_is_power_of_two(N); }
static int simd_size() { return pffftd_simd_size(); }
static const char * simd_arch() { return pffftd_simd_arch(); }
};
@@ -143,10 +123,12 @@ public:
static bool isDoubleScalar() { return sizeof(Scalar) == sizeof(double); }
// simple helper to get minimum possible fft length
- static int minFFtsize() { return Types<T>::minFFtsize(); }
+ static int minFFtsize() { return pffft_min_fft_size( isComplexTransform() ? PFFFT_COMPLEX : PFFFT_REAL ); }
+
// simple helper to determine next power of 2 - without inexact/rounding floating point operations
- static int nextPowerOfTwo(int N) { return Types<T>::nextPowerOfTwo(N); }
- static bool isPowerOfTwo(int N) { return Types<T>::isPowerOfTwo(N); }
+ static int nextPowerOfTwo(int N) { return pffft_next_power_of_two(N); }
+ static bool isPowerOfTwo(int N) { return pffft_is_power_of_two(N); }
+
static int simd_size() { return Types<T>::simd_size(); }
static const char * simd_arch() { return Types<T>::simd_arch(); }
@@ -380,6 +362,31 @@ private:
};
+template<typename T>
+inline T* alignedAlloc(int length) {
+ return (T*)pffft_aligned_malloc( length * sizeof(T) );
+}
+
+inline void alignedFree(void *ptr) {
+ pffft_aligned_free(ptr);
+}
+
+
+// simple helper to get minimum possible fft length
+inline int minFFtsize(pffft_transform_t transform) {
+ return pffft_min_fft_size(transform);
+}
+
+// simple helper to determine next power of 2 - without inexact/rounding floating point operations
+inline int nextPowerOfTwo(int N) {
+ return pffft_next_power_of_two(N);
+}
+
+inline bool isPowerOfTwo(int N) {
+ return pffft_is_power_of_two(N);
+}
+
+
////////////////////////////////////////////////////////////////////
@@ -659,7 +666,7 @@ inline Fft<T>::Fft(int length, int stackThresholdLen)
template<typename T>
inline Fft<T>::~Fft()
{
- Types<T>::alignedFree(work);
+ alignedFree(work);
}
template<typename T>
@@ -679,12 +686,12 @@ Fft<T>::prepareLength(int newLength)
setup.prepareLength(length);
if (work) {
- Types<T>::alignedFree(work);
+ alignedFree(work);
work = NULL;
}
if (useHeap) {
- work = reinterpret_cast<Scalar*>( Types<T>::alignedAlloc(length) );
+ work = reinterpret_cast<Scalar*>( alignedAllocType(length) );
}
}
@@ -861,7 +868,7 @@ template<typename T>
inline void
Fft<T>::alignedFree(void* ptr)
{
- Types<T>::alignedFree(ptr);
+ pffft::alignedFree(ptr);
}
@@ -869,21 +876,21 @@ template<typename T>
inline T*
pffft::Fft<T>::alignedAllocType(int length)
{
- return Types<T>::alignedAlloc(length);
+ return alignedAlloc<T>(length);
}
template<typename T>
inline typename pffft::Fft<T>::Scalar*
pffft::Fft<T>::alignedAllocScalar(int length)
{
- return Types<Scalar>::alignedAlloc(length);
+ return alignedAlloc<Scalar>(length);
}
template<typename T>
inline typename Fft<T>::Complex *
Fft<T>::alignedAllocComplex(int length)
{
- return Types<Complex>::alignedAlloc(length);
+ return alignedAlloc<Complex>(length);
}
@@ -954,7 +961,7 @@ class PFAlloc {
// allocate but don't initialize num elements of type T
pointer allocate (size_type num, const void* = 0) {
- pointer ret = (pointer)( Types<T>::alignedAlloc(num) );
+ pointer ret = (pointer)( alignedAlloc<T>(num) );
return ret;
}
@@ -973,7 +980,7 @@ class PFAlloc {
// deallocate storage p of deleted elements
void deallocate (pointer p, size_type num) {
// deallocate memory with pffft
- Types<T>::alignedFree( (void*)p );
+ alignedFree( (void*)p );
}
};
diff --git a/pffft_common.c b/pffft_common.c
new file mode 100644
index 0000000..1121ac7
--- /dev/null
+++ b/pffft_common.c
@@ -0,0 +1,68 @@
+
+#include "pffft.h"
+
+#include <stdlib.h>
+
+/* SSE and co like 16-bytes aligned pointers
+ * with a 64-byte alignment, we are even aligned on L2 cache lines... */
+#define MALLOC_V4SF_ALIGNMENT 64
+
+static void * Valigned_malloc(size_t nb_bytes) {
+ void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
+ if (!p0) return (void *) 0;
+ p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1))));
+ *((void **) p - 1) = p0;
+ return p;
+}
+
+static void Valigned_free(void *p) {
+ if (p) free(*((void **) p - 1));
+}
+
+
+static int next_power_of_two(int N) {
+ /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
+ /* compute the next highest power of 2 of 32-bit v */
+ unsigned v = N;
+ v--;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v++;
+ return v;
+}
+
+static int is_power_of_two(int N) {
+ /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */
+ int f = N && !(N & (N - 1));
+ return f;
+}
+
+static int min_fft_size(pffft_transform_t transform) {
+ /* unfortunately, the fft size must be a multiple of 16 for complex FFTs
+ and 32 for real FFTs -- a lot of stuff would need to be rewritten to
+ handle other cases (or maybe just switch to a scalar fft, I don't know..) */
+ int simdSz = pffft_simd_size();
+ if (transform == PFFFT_REAL)
+ return ( 2 * simdSz * simdSz );
+ else if (transform == PFFFT_COMPLEX)
+ return ( simdSz * simdSz );
+ else
+ return 1;
+}
+
+
+void *pffft_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); }
+void pffft_aligned_free(void *p) { Valigned_free(p); }
+int pffft_next_power_of_two(int N) { return next_power_of_two(N); }
+int pffft_is_power_of_two(int N) { return is_power_of_two(N); }
+int pffft_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); }
+
+void *pffftd_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); }
+void pffftd_aligned_free(void *p) { Valigned_free(p); }
+int pffftd_next_power_of_two(int N) { return next_power_of_two(N); }
+int pffftd_is_power_of_two(int N) { return is_power_of_two(N); }
+int pffftd_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); }
+
diff --git a/pffft_double.c b/pffft_double.c
index dd0295c..28c0832 100644
--- a/pffft_double.c
+++ b/pffft_double.c
@@ -117,9 +117,6 @@
#define FUNC_ZREORDER pffftd_zreorder
#define FUNC_ZCONVOLVE_ACCUMULATE pffftd_zconvolve_accumulate
#define FUNC_ZCONVOLVE_NO_ACCU pffftd_zconvolve_no_accu
-#define FUNC_MIN_FFT_SIZE pffftd_min_fft_size
-#define FUNC_NEXT_POWER_OF_TWO pffftd_next_power_of_two
-#define FUNC_IS_POWER_OF_TWO pffftd_is_power_of_two
#define FUNC_ALIGNED_MALLOC pffftd_aligned_malloc
#define FUNC_ALIGNED_FREE pffftd_aligned_free
diff --git a/pffft_double.h b/pffft_double.h
index cd460ed..d83c06d 100644
--- a/pffft_double.h
+++ b/pffft_double.h
@@ -172,19 +172,27 @@ extern "C" {
void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling);
/*
- Perform a multiplication of the frequency components of dft_a and
- dft_b and put result in dft_ab. The arrays should have
- been obtained with pffft_transform(.., PFFFT_FORWARD) and should
- *not* have been reordered with pffft_zreorder (otherwise just
- perform the operation yourself as the dft coefs are stored as
- interleaved complex numbers).
-
- the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
-
- The dft_a, dft_b and dft_ab pointers may alias.
+ Perform a multiplication of the frequency components of dft_a and
+ dft_b and put result in dft_ab. The arrays should have
+ been obtained with pffft_transform(.., PFFFT_FORWARD) and should
+ *not* have been reordered with pffft_zreorder (otherwise just
+ perform the operation yourself as the dft coefs are stored as
+ interleaved complex numbers).
+
+ the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
+
+ The dft_a, dft_b and dft_ab pointers may alias.
*/
void pffftd_zconvolve_no_accu(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double*dft_ab, double scaling);
+ /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */
+ int pffftd_simd_size();
+
+ /* return string identifier of used architecture (AVX/..) */
+ const char * pffftd_simd_arch();
+
+
+ /* following functions are identical to the pffft_ functions */
/* simple helper to get minimum possible fft size */
int pffftd_min_fft_size(pffft_transform_t transform);
@@ -205,12 +213,6 @@ extern "C" {
void *pffftd_aligned_malloc(size_t nb_bytes);
void pffftd_aligned_free(void *);
- /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */
- int pffftd_simd_size();
-
- /* return string identifier of used architecture (SSE/NEON/Altivec/..) */
- const char * pffftd_simd_arch();
-
#ifdef __cplusplus
}
#endif
diff --git a/pffft_priv_impl.h b/pffft_priv_impl.h
index e051b0f..36cae59 100644
--- a/pffft_priv_impl.h
+++ b/pffft_priv_impl.h
@@ -69,14 +69,6 @@
#endif
-void *FUNC_ALIGNED_MALLOC(size_t nb_bytes) {
- return Valigned_malloc(nb_bytes);
-}
-
-void FUNC_ALIGNED_FREE(void *p) {
- Valigned_free(p);
-}
-
int FUNC_SIMD_SIZE() { return SIMD_SZ; }
const char * FUNC_SIMD_ARCH() { return VARCH; }
@@ -2195,37 +2187,3 @@ int FUNC_VALIDATE_SIMD_EX(FILE * DbgOut)
#endif /* end if ( SIMD_SZ == 4 ) */
-
-
-int FUNC_NEXT_POWER_OF_TWO(int N) {
- /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
- /* compute the next highest power of 2 of 32-bit v */
- unsigned v = N;
- v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v++;
- return v;
-}
-
-int FUNC_IS_POWER_OF_TWO(int N) {
- /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */
- int f = N && !(N & (N - 1));
- return f;
-}
-
-int FUNC_MIN_FFT_SIZE(pffft_transform_t transform) {
- /* unfortunately, the fft size must be a multiple of 16 for complex FFTs
- and 32 for real FFTs -- a lot of stuff would need to be rewritten to
- handle other cases (or maybe just switch to a scalar fft, I don't know..) */
- if (transform == PFFFT_REAL)
- return ( 2 * SIMD_SZ * SIMD_SZ );
- else if (transform == PFFFT_COMPLEX)
- return ( SIMD_SZ * SIMD_SZ );
- else
- return 1;
-}
-
diff --git a/simd/pf_double.h b/simd/pf_double.h
index 718172c..c6bac31 100644
--- a/simd/pf_double.h
+++ b/simd/pf_double.h
@@ -78,24 +78,5 @@ typedef double vsfscalar;
#define SVMUL(f,v) VMUL(LD_PS1(f),v)
#endif
-/* SSE and co like 16-bytes aligned pointers
- * with a 64-byte alignment, we are even aligned on L2 cache lines... */
-#define MALLOC_V4SF_ALIGNMENT 64
-
-static
-void *Valigned_malloc(size_t nb_bytes) {
- void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
- if (!p0) return (void *) 0;
- p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1))));
- *((void **) p - 1) = p0;
- return p;
-}
-
-static
-void Valigned_free(void *p) {
- if (p) free(*((void **) p - 1));
-}
-
-
#endif /* PF_DBL_H */
diff --git a/simd/pf_float.h b/simd/pf_float.h
index 2491a42..1798194 100644
--- a/simd/pf_float.h
+++ b/simd/pf_float.h
@@ -80,24 +80,5 @@ typedef float vsfscalar;
#define SVMUL(f,v) VMUL(LD_PS1(f),v)
#endif
-/* SSE and co like 16-bytes aligned pointers
- * with a 64-byte alignment, we are even aligned on L2 cache lines... */
-#define MALLOC_V4SF_ALIGNMENT 64
-
-static
-void *Valigned_malloc(size_t nb_bytes) {
- void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
- if (!p0) return (void *) 0;
- p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1))));
- *((void **) p - 1) = p0;
- return p;
-}
-
-static
-void Valigned_free(void *p) {
- if (p) free(*((void **) p - 1));
-}
-
-
#endif /* PF_FLT_H */