diff options
author | dario mambro <dario.mambro@gmail.com> | 2020-03-24 20:30:39 +0100 |
---|---|---|
committer | dario mambro <dario.mambro@gmail.com> | 2020-03-24 20:30:39 +0100 |
commit | b42ce9214235e99d4de2ad93ba6beba48b6c71a5 (patch) | |
tree | 0fa470ef0bf7800d8dfdab9e793b11ab575d8361 /pffft_double.h | |
parent | 5850463c85b06f9cfcf6925559a31735e97a8099 (diff) | |
download | pffft-b42ce9214235e99d4de2ad93ba6beba48b6c71a5.tar.gz |
changed notation for double precision version
Diffstat (limited to 'pffft_double.h')
-rw-r--r-- | pffft_double.h | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/pffft_double.h b/pffft_double.h new file mode 100644 index 0000000..9f50228 --- /dev/null +++ b/pffft_double.h @@ -0,0 +1,200 @@ +/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) + + Based on original fortran 77 code from FFTPACKv4 from NETLIB, + authored by Dr Paul Swarztrauber of NCAR, in 1985. + + As confirmed by the NCAR fftpack software curators, the following + FFTPACKv5 license applies to FFTPACKv4 sources. My changes are + released under the same terms. + + FFTPACK license: + + http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html + + Copyright (c) 2004 the University Corporation for Atmospheric + Research ("UCAR"). All rights reserved. Developed by NCAR's + Computational and Information Systems Laboratory, UCAR, + www.cisl.ucar.edu. + + Redistribution and use of the Software in source and binary forms, + with or without modification, is permitted provided that the + following conditions are met: + + - Neither the names of NCAR's Computational and Information Systems + Laboratory, the University Corporation for Atmospheric Research, + nor the names of its sponsors or contributors may be used to + endorse or promote products derived from this Software without + specific prior written permission. + + - Redistributions of source code must retain the above copyright + notices, this list of conditions, and the disclaimer below. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the disclaimer below in the + documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. +*/ +/* + NOTE: This file is adapted from Julien Pommier's original PFFFT, + which works on 32 bit floating point precision using SSE instructions, + to work with 64 bit floating point precision using AVX instructions. + Author: Dario Mambro @ https://github.com/unevens/pffft +*/ +/* + PFFFT : a Pretty Fast FFT. + + This is basically an adaptation of the single precision fftpack + (v4) as found on netlib taking advantage of SIMD instruction found + on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON). + + For architectures where no SIMD instruction is available, the code + falls back to a scalar version. + + Restrictions: + + - 1D transforms only, with 64-bit single precision. + + - supports only transforms for inputs of length N of the form + N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, + 144, 160, etc are all acceptable lengths). Performance is best for + 128<=N<=8192. + + - all (double*) pointers in the functions below are expected to + have an "simd-compatible" alignment, that is 32 bytes on x86 and + powerpc CPUs. + + You can allocate such buffers with the functions + pffft_aligned_malloc / pffft_aligned_free (or with stuff like + posix_memalign..) + +*/ + +#ifndef PFFFT_DOUBLE_H +#define PFFFT_DOUBLE_H + +#include <stddef.h> // for size_t + +#ifdef __cplusplus +extern "C" { +#endif + + /* opaque struct holding internal stuff (precomputed twiddle factors) + this struct can be shared by many threads as it contains only + read-only data. + */ + typedef struct PFFFTD_Setup PFFFTD_Setup; + + /* direction of the transform */ + typedef enum { PFFFTD_FORWARD, PFFFTD_BACKWARD } pffftd_direction_t; + + /* type of transform */ + typedef enum { PFFFTD_REAL, PFFFTD_COMPLEX } pffftd_transform_t; + + /* + prepare for performing transforms of size N -- the returned + PFFFTD_Setup structure is read-only so it can safely be shared by + multiple concurrent threads. + */ + PFFFTD_Setup *pffftd_new_setup(int N, pffftd_transform_t transform); + void pffftd_destroy_setup(PFFFTD_Setup *); + /* + Perform a Fourier transform , The z-domain data is stored in the + most efficient order for transforming it back, or using it for + convolution. If you need to have its content sorted in the + "usual" way, that is as an array of interleaved complex numbers, + either use pffft_transform_ordered , or call pffft_zreorder after + the forward fft, and before the backward fft. + + Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. + Typically you will want to scale the backward transform by 1/N. + + The 'work' pointer should point to an area of N (2*N for complex + fft) doubles, properly aligned. If 'work' is NULL, then stack will + be used instead (this is probably the best strategy for small + FFTs, say for N < 16384). + + input and output may alias. + */ + void pffftd_transform(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffftd_direction_t direction); + + /* + Similar to pffft_transform, but makes sure that the output is + ordered as expected (interleaved complex numbers). This is + similar to calling pffft_transform and then pffft_zreorder. + + input and output may alias. + */ + void pffftd_transform_ordered(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffftd_direction_t direction); + + /* + call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., + PFFFT_FORWARD) if you want to have the frequency components in + the correct "canonical" order, as interleaved complex numbers. + + (for real transforms, both 0-frequency and half frequency + components, which are real, are assembled in the first entry as + F(0)+i*F(n/2+1). Note that the original fftpack did place + F(n/2+1) at the end of the arrays). + + input and output should not alias. + */ + void pffftd_zreorder(PFFFTD_Setup *setup, const double *input, double *output, pffftd_direction_t direction); + + /* + Perform a multiplication of the frequency components of dft_a and + dft_b and accumulate them into dft_ab. The arrays should have + been obtained with pffft_transform(.., PFFFT_FORWARD) and should + *not* have been reordered with pffft_zreorder (otherwise just + perform the operation yourself as the dft coefs are stored as + interleaved complex numbers). + + the operation performed is: dft_ab += (dft_a * fdt_b)*scaling + + The dft_a, dft_b and dft_ab pointers may alias. + */ + void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling); + + /* simple helper to get minimum possible fft size */ + int pffftd_min_fft_size(pffftd_transform_t transform); + + /* simple helper to determine next power of 2 + - without inexact/rounding floating point operations + */ + int pffftd_min_fft_size(int N); + + /* simple helper to determine next power of 2 + - without inexact/rounding floating point operations +*/ + int pffftd_next_power_of_two(int N); + + /* simple helper to determine if power of 2 - returns bool */ + int pffftd_is_power_of_two(int N); + + /* + the double buffers must have the correct alignment (32-byte boundary + on intel and powerpc). This function may be used to obtain such + correctly aligned buffers. + */ + void *pffftd_aligned_malloc(size_t nb_bytes); + void pffftd_aligned_free(void *); + + /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */ + int pffftd_simd_size(); + + void validate_pffftd_simd(); + +#ifdef __cplusplus +} +#endif + +#endif // PFFFT_H |