#include "pffft.h" #include /* SSE and co like 16-bytes aligned pointers * with a 64-byte alignment, we are even aligned on L2 cache lines... */ #define MALLOC_V4SF_ALIGNMENT 64 static void * Valigned_malloc(size_t nb_bytes) { void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); if (!p0) return (void *) 0; p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1)))); *((void **) p - 1) = p0; return p; } static void Valigned_free(void *p) { if (p) free(*((void **) p - 1)); } static int next_power_of_two(int N) { /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ /* compute the next highest power of 2 of 32-bit v */ unsigned v = N; v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v; } static int is_power_of_two(int N) { /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */ int f = N && !(N & (N - 1)); return f; } static int min_fft_size(pffft_transform_t transform) { /* unfortunately, the fft size must be a multiple of 16 for complex FFTs and 32 for real FFTs -- a lot of stuff would need to be rewritten to handle other cases (or maybe just switch to a scalar fft, I don't know..) */ int simdSz = pffft_simd_size(); if (transform == PFFFT_REAL) return ( 2 * simdSz * simdSz ); else if (transform == PFFFT_COMPLEX) return ( simdSz * simdSz ); else return 1; } void *pffft_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); } void pffft_aligned_free(void *p) { Valigned_free(p); } int pffft_next_power_of_two(int N) { return next_power_of_two(N); } int pffft_is_power_of_two(int N) { return is_power_of_two(N); } int pffft_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); } void *pffftd_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); } void pffftd_aligned_free(void *p) { Valigned_free(p); } int pffftd_next_power_of_two(int N) { return next_power_of_two(N); } int pffftd_is_power_of_two(int N) { return is_power_of_two(N); } int pffftd_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); }