aboutsummaryrefslogtreecommitdiff
path: root/decoder/ixheaacd_fft.c
diff options
context:
space:
mode:
Diffstat (limited to 'decoder/ixheaacd_fft.c')
-rw-r--r--decoder/ixheaacd_fft.c1282
1 files changed, 1066 insertions, 216 deletions
diff --git a/decoder/ixheaacd_fft.c b/decoder/ixheaacd_fft.c
index 0932097..ecb88c1 100644
--- a/decoder/ixheaacd_fft.c
+++ b/decoder/ixheaacd_fft.c
@@ -28,9 +28,11 @@
#include "ixheaacd_function_selector.h"
extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
+extern const FLOAT32 ixheaacd_twiddle_table_fft[514];
+extern const FLOAT32 ixheaacd_twiddle_table_fft_flt[16];
extern const WORD32 ixheaacd_twiddle_table_3pr[1155];
extern const WORD32 ixheaacd_twiddle_table_3pi[1155];
-extern const WORD8 ixheaacd_mps_dig_rev[16];
+extern const WORD8 ixheaacd_mps_dig_rev[8];
#define PLATFORM_INLINE __inline
@@ -61,25 +63,844 @@ static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) {
return (result);
}
+static PLATFORM_INLINE FLOAT32 ixheaacd_mult32X32float(FLOAT32 a, FLOAT32 b) {
+ FLOAT32 result;
-VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
- WORD32 *fin_im, WORD32 nlength) {
- WORD32 i, j, k, n_stages;
- WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+ result = a * b;
+
+ return result;
+}
+
+static PLATFORM_INLINE FLOAT32 ixheaacd_mac32X32float(FLOAT32 a, FLOAT32 b, FLOAT32 c) {
+ FLOAT32 result;
+
+ result = a + b * c;
+
+ return result;
+}
+
+VOID ixheaacd_mps_synth_calc_fft(FLOAT32 *ptr_xr, FLOAT32 *ptr_xi,
+ WORD32 npoints) {
+ WORD32 i, j, k;
+ FLOAT32 y[64], z[64];
+ FLOAT32 *ptr_y = y, *ptr_z = z;
+ const FLOAT32 *ptr_w = ixheaacd_twiddle_table_fft_flt;
+
+ for (i = 0; i < npoints; i += 4) {
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+ FLOAT32 *inp = ptr_xr;
+ FLOAT32 tmk;
+
+ WORD32 h2 = ixheaacd_mps_dig_rev[i >> 2];
+
+ inp += (h2);
+
+ x0r = *inp;
+ x0i = *(inp + 1);
+ inp += 16;
+
+ x1r = *inp;
+ x1i = *(inp + 1);
+ inp += 16;
+
+ x2r = *inp;
+ x2i = *(inp + 1);
+ inp += 16;
+
+ x3r = *inp;
+ x3i = *(inp + 1);
+
+ x0r = x0r + x2r;
+ x0i = x0i + x2i;
+
+ tmk = x0r - x2r;
+ x2r = tmk - x2r;
+ tmk = x0i - x2i;
+ x2i = tmk - x2i;
+
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+
+ tmk = x1r - x3r;
+ x3r = tmk - x3r;
+ tmk = x1i - x3i;
+ x3i = tmk - x3i;
+
+ x0r = x0r + x1r;
+ x0i = x0i + x1i;
+
+ tmk = x0r - x1r;
+ x1r = tmk - x1r;
+ tmk = x0i - x1i;
+ x1i = tmk - x1i;
+
+ x2r = x2r + x3i;
+ x2i = x2i - x3r;
+
+ tmk = x2r - x3i;
+ x3i = tmk - x3i;
+ tmk = x2i + x3r;
+ x3r = tmk + x3r;
+
+ *ptr_y++ = x0r;
+ *ptr_y++ = x0i;
+ *ptr_y++ = x2r;
+ *ptr_y++ = x2i;
+ *ptr_y++ = x1r;
+ *ptr_y++ = x1i;
+ *ptr_y++ = x3i;
+ *ptr_y++ = x3r;
+
+ inp = ptr_xi;
+
+ inp += (h2);
+
+ x0r = *inp;
+ x0i = *(inp + 1);
+ inp += 16;
+
+ x1r = *inp;
+ x1i = *(inp + 1);
+ inp += 16;
+
+ x2r = *inp;
+ x2i = *(inp + 1);
+ inp += 16;
+
+ x3r = *inp;
+ x3i = *(inp + 1);
+
+ x0r = x0r + x2r;
+ x0i = x0i + x2i;
+
+ tmk = x0r - x2r;
+ x2r = tmk - x2r;
+ tmk = x0i - x2i;
+ x2i = tmk - x2i;
+
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+
+ tmk = x1r - x3r;
+ x3r = tmk - x3r;
+ tmk = x1i - x3i;
+ x3i = tmk - x3i;
+
+ x0r = x0r + x1r;
+ x0i = x0i + x1i;
+
+ tmk = x0r - x1r;
+ x1r = tmk - x1r;
+ tmk = x0i - x1i;
+ x1i = tmk - x1i;
+
+ x2r = x2r + x3i;
+ x2i = x2i - x3r;
+
+ tmk = x2r - x3i;
+ x3i = tmk - x3i;
+ tmk = x2i + x3r;
+ x3r = tmk + x3r;
+
+ *ptr_z++ = x0r;
+ *ptr_z++ = x0i;
+ *ptr_z++ = x2r;
+ *ptr_z++ = x2i;
+ *ptr_z++ = x1r;
+ *ptr_z++ = x1i;
+ *ptr_z++ = x3i;
+ *ptr_z++ = x3r;
+ }
+ ptr_y -= 64;
+ ptr_z -= 64;
+ {
+ FLOAT32 *data_r = ptr_y;
+ FLOAT32 *data_i = ptr_z;
+ for (k = 2; k != 0; k--) {
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ x0r = (*data_r);
+ x0i = (*(data_r + 1));
+ data_r += 8;
+
+ x1r = (*data_r);
+ x1i = (*(data_r + 1));
+ data_r += 8;
+
+ x2r = (*data_r);
+ x2i = (*(data_r + 1));
+ data_r += 8;
+
+ x3r = (*data_r);
+ x3i = (*(data_r + 1));
+ data_r -= 24;
+
+ x0r = x0r + x2r;
+ x0i = x0i + x2i;
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + x1r;
+ x0i = x0i + x1i;
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + x3i;
+ x2i = x2i - x3r;
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
+
+ *data_r = x0r;
+ *(data_r + 1) = x0i;
+ data_r += 8;
+
+ *data_r = x2r;
+ *(data_r + 1) = x2i;
+ data_r += 8;
+
+ *data_r = x1r;
+ *(data_r + 1) = x1i;
+ data_r += 8;
+
+ *data_r = x3i;
+ *(data_r + 1) = x3r;
+ data_r += 8;
+
+ x0r = (*data_i);
+ x0i = (*(data_i + 1));
+ data_i += 8;
+
+ x1r = (*data_i);
+ x1i = (*(data_i + 1));
+ data_i += 8;
+
+ x2r = (*data_i);
+ x2i = (*(data_i + 1));
+ data_i += 8;
+
+ x3r = (*data_i);
+ x3i = (*(data_i + 1));
+ data_i -= 24;
+
+ x0r = x0r + x2r;
+ x0i = x0i + x2i;
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + x1r;
+ x0i = x0i + x1i;
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + x3i;
+ x2i = x2i - x3r;
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
+
+ *data_i = x0r;
+ *(data_i + 1) = x0i;
+ data_i += 8;
+
+ *data_i = x2r;
+ *(data_i + 1) = x2i;
+ data_i += 8;
+
+ *data_i = x1r;
+ *(data_i + 1) = x1i;
+ data_i += 8;
+
+ *data_i = x3i;
+ *(data_i + 1) = x3r;
+ data_i += 8;
+ }
+ data_r = ptr_y + 2;
+ data_i = ptr_z + 2;
+
+ for (k = 2; k != 0; k--) {
+ FLOAT32 tmp;
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ data_r += 8;
+
+ x1r = *data_r;
+ x1i = *(data_r + 1);
+ data_r += 8;
+
+ x2r = *data_r;
+ x2i = *(data_r + 1);
+ data_r += 8;
+
+ x3r = *data_r;
+ x3i = *(data_r + 1);
+ data_r -= 24;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
+ ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
+ x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
+ (FLOAT32)x1i, 0.923880f);
+ x1r = tmp;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
+ ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
+ x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
+ (FLOAT32)x2i, 0.707107f);
+ x2r = tmp;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
+ ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
+ x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
+ (FLOAT32)x3i, 0.382683f);
+ x3r = tmp;
+
+ x0r = (*data_r);
+ x0i = (*(data_r + 1));
+
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
+
+ *data_r = x0r;
+ *(data_r + 1) = x0i;
+ data_r += 8;
+
+ *data_r = x2r;
+ *(data_r + 1) = x2i;
+ data_r += 8;
+
+ *data_r = x1r;
+ *(data_r + 1) = x1i;
+ data_r += 8;
+
+ *data_r = x3i;
+ *(data_r + 1) = x3r;
+ data_r += 8;
+ data_i += 8;
+
+ x1r = *data_i;
+ x1i = *(data_i + 1);
+ data_i += 8;
+
+ x2r = *data_i;
+ x2i = *(data_i + 1);
+ data_i += 8;
+
+ x3r = *data_i;
+ x3i = *(data_i + 1);
+ data_i -= 24;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) -
+ ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f));
+ x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f),
+ (FLOAT32)x1i, 0.923880f);
+ x1r = tmp;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) -
+ ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
+ x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f),
+ (FLOAT32)x2i, 0.707107f);
+ x2r = tmp;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) -
+ ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f));
+ x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f),
+ (FLOAT32)x3i, 0.382683f);
+ x3r = tmp;
+
+ x0r = (*data_i);
+ x0i = (*(data_i + 1));
+
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
+
+ *data_i = x0r;
+ *(data_i + 1) = x0i;
+ data_i += 8;
+
+ *data_i = x2r;
+ *(data_i + 1) = x2i;
+ data_i += 8;
+
+ *data_i = x1r;
+ *(data_i + 1) = x1i;
+ data_i += 8;
+
+ *data_i = x3i;
+ *(data_i + 1) = x3r;
+ data_i += 8;
+ }
+ data_r -= 62;
+ data_i -= 62;
+ for (k = 2; k != 0; k--) {
+ FLOAT32 tmp;
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ data_r += 8;
+
+ x1r = *data_r;
+ x1i = *(data_r + 1);
+ data_r += 8;
+
+ x2r = *data_r;
+ x2i = *(data_r + 1);
+ data_r += 8;
+
+ x3r = *data_r;
+ x3i = *(data_r + 1);
+ data_r -= 24;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
+ ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
+ x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
+ (FLOAT32)x1i, 0.707107f);
+ x1r = tmp;
+
+ tmp = x2i;
+ x2i = -x2r;
+ x2r = tmp;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
+ ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
+ x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
+ ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
+ x3r = tmp;
+
+ x0r = (*data_r);
+ x0i = (*(data_r + 1));
+
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
+
+ *data_r = x0r;
+ *(data_r + 1) = x0i;
+ data_r += 8;
+
+ *data_r = x2r;
+ *(data_r + 1) = x2i;
+ data_r += 8;
+
+ *data_r = x1r;
+ *(data_r + 1) = x1i;
+ data_r += 8;
+
+ *data_r = x3i;
+ *(data_r + 1) = x3r;
+ data_r += 8;
+ data_i += 8;
+
+ x1r = *data_i;
+ x1i = *(data_i + 1);
+ data_i += 8;
+
+ x2r = *data_i;
+ x2i = *(data_i + 1);
+ data_i += 8;
+
+ x3r = *data_i;
+ x3i = *(data_i + 1);
+ data_i -= 24;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) -
+ ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f));
+ x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f),
+ (FLOAT32)x1i, 0.707107f);
+ x1r = tmp;
+
+ tmp = x2i;
+ x2i = -x2r;
+ x2r = tmp;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) +
+ ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f));
+ x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) +
+ ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f));
+ x3r = tmp;
+
+ x0r = (*data_i);
+ x0i = (*(data_i + 1));
+
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
+
+ *data_i = x0r;
+ *(data_i + 1) = x0i;
+ data_i += 8;
+
+ *data_i = x2r;
+ *(data_i + 1) = x2i;
+ data_i += 8;
+
+ *data_i = x1r;
+ *(data_i + 1) = x1i;
+ data_i += 8;
+
+ *data_i = x3i;
+ *(data_i + 1) = x3r;
+ data_i += 8;
+ }
+ data_r -= 62;
+ data_i -= 62;
+ for (k = 2; k != 0; k--) {
+ FLOAT32 tmp;
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ data_r += 8;
+
+ x1r = *data_r;
+ x1i = *(data_r + 1);
+ data_r += 8;
+
+ x2r = *data_r;
+ x2i = *(data_r + 1);
+ data_r += 8;
+
+ x3r = *data_r;
+ x3i = *(data_r + 1);
+ data_r -= 24;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
+ ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
+ x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
+ (FLOAT32)x1i, 0.382683f);
+ x1r = tmp;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
+ ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
+ x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
+ ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
+ x2r = tmp;
+
+ tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
+ ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
+ x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
+ (FLOAT32)x3i, 0.923880f);
+ x3r = tmp;
+
+ x0r = (*data_r);
+ x0i = (*(data_r + 1));
+
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i - x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i + (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
+
+ *data_r = x0r;
+ *(data_r + 1) = x0i;
+ data_r += 8;
+
+ *data_r = x2r;
+ *(data_r + 1) = x2i;
+ data_r += 8;
+
+ *data_r = x1r;
+ *(data_r + 1) = x1i;
+ data_r += 8;
+
+ *data_r = x3i;
+ *(data_r + 1) = x3r;
+ data_r += 8;
+ data_i += 8;
+
+ x1r = *data_i;
+ x1i = *(data_i + 1);
+ data_i += 8;
+
+ x2r = *data_i;
+ x2i = *(data_i + 1);
+ data_i += 8;
+
+ x3r = *data_i;
+ x3i = *(data_i + 1);
+ data_i -= 24;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) -
+ ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f));
+ x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f),
+ (FLOAT32)x1i, 0.382683f);
+ x1r = tmp;
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) +
+ ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f));
+ x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) +
+ ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f));
+ x2r = tmp;
+
+ tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) +
+ ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f));
+ x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f),
+ (FLOAT32)x3i, 0.923880f);
+ x3r = tmp;
+
+ x0r = (*data_i);
+ x0i = (*(data_i + 1));
+
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i - x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i + (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
+
+ *data_i = x0r;
+ *(data_i + 1) = x0i;
+ data_i += 8;
+
+ *data_i = x2r;
+ *(data_i + 1) = x2i;
+ data_i += 8;
+
+ *data_i = x1r;
+ *(data_i + 1) = x1i;
+ data_i += 8;
+
+ *data_i = x3i;
+ *(data_i + 1) = x3r;
+ data_i += 8;
+ }
+ data_r -= 62;
+ data_i -= 62;
+ }
+ {
+ const FLOAT32 *twiddles = ptr_w;
+ FLOAT32 x0r, x0i, x1r, x1i;
+ for (j = 8; j != 0; j--) {
+ FLOAT32 W1 = *twiddles;
+ twiddles++;
+ FLOAT32 W4 = *twiddles;
+ twiddles++;
+ FLOAT32 tmp;
+
+ x0r = *ptr_y;
+ x0i = *(ptr_y + 1);
+ ptr_y += 32;
+ ptr_xr += 32;
+
+ x1r = *ptr_y;
+ x1i = *(ptr_y + 1);
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
+ ixheaacd_mult32X32float((FLOAT32)x1i, W4));
+ x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
+ (FLOAT32)x1i, W1);
+ x1r = tmp;
+
+ *ptr_xr = (x0r) - (x1r);
+ *(ptr_xr + 1) = (x0i) - (x1i);
+ ptr_y -= 32;
+ ptr_xr -= 32;
+
+ *ptr_xr = (x0r) + (x1r);
+ *(ptr_xr + 1) = (x0i) + (x1i);
+ ptr_y += 2;
+ ptr_xr += 2;
+
+ x0r = *ptr_z;
+ x0i = *(ptr_z + 1);
+ ptr_z += 32;
+ ptr_xi += 32;
+
+ x1r = *ptr_z;
+ x1i = *(ptr_z + 1);
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) -
+ ixheaacd_mult32X32float((FLOAT32)x1i, W4));
+ x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4),
+ (FLOAT32)x1i, W1);
+ x1r = tmp;
+
+ *ptr_xi = (x0r) - (x1r);
+ *(ptr_xi + 1) = (x0i) - (x1i);
+ ptr_z -= 32;
+ ptr_xi -= 32;
+
+ *ptr_xi = (x0r) + (x1r);
+ *(ptr_xi + 1) = (x0i) + (x1i);
+ ptr_z += 2;
+ ptr_xi += 2;
+ }
+ twiddles = ptr_w;
+ for (j = 8; j != 0; j--) {
+ FLOAT32 W1 = *twiddles;
+ twiddles++;
+ FLOAT32 W4 = *twiddles;
+ twiddles++;
+ FLOAT32 tmp;
+
+ x0r = *ptr_y;
+ x0i = *(ptr_y + 1);
+ ptr_y += 32;
+ ptr_xr += 32;
+
+ x1r = *ptr_y;
+ x1i = *(ptr_y + 1);
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
+ ixheaacd_mult32X32float((FLOAT32)x1i, W1));
+ x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
+ ixheaacd_mult32X32float((FLOAT32)x1i, W4));
+ x1r = tmp;
+
+ *ptr_xr = (x0r) - (x1r);
+ *(ptr_xr + 1) = (x0i) - (x1i);
+ ptr_y -= 32;
+ ptr_xr -= 32;
+
+ *ptr_xr = (x0r) + (x1r);
+ *(ptr_xr + 1) = (x0i) + (x1i);
+ ptr_y += 2;
+ ptr_xr += 2;
+
+ x0r = *ptr_z;
+ x0i = *(ptr_z + 1);
+ ptr_z += 32;
+ ptr_xi += 32;
+
+ x1r = *ptr_z;
+ x1i = *(ptr_z + 1);
+
+ tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) +
+ ixheaacd_mult32X32float((FLOAT32)x1i, W1));
+ x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) +
+ ixheaacd_mult32X32float((FLOAT32)x1i, W4));
+ x1r = tmp;
+
+ *ptr_xi = (x0r) - (x1r);
+ *(ptr_xi + 1) = (x0i) - (x1i);
+ ptr_z -= 32;
+ ptr_xi -= 32;
+
+ *ptr_xi = (x0r) + (x1r);
+ *(ptr_xi + 1) = (x0i) + (x1i);
+ ptr_z += 2;
+ ptr_xi += 2;
+ }
+ }
+}
+
+VOID ixheaacd_mps_complex_fft(FLOAT32 *xr, FLOAT32 *xi, WORD32 nlength) {
+ WORD32 i, j, k, n_stages, h2;
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
WORD32 del, nodespacing, in_loop_cnt;
- WORD32 y[128];
+ WORD32 dig_rev_shift;
+ WORD32 not_power_4;
+ FLOAT32 ptr_x[256];
+ FLOAT32 y[256];
WORD32 npoints = nlength;
- WORD32 *ptr_y = y;
- const WORD32 *ptr_w;
+ FLOAT32 *ptr_y = y;
+ const FLOAT32 *ptr_w;
+ dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16;
n_stages = 30 - ixheaacd_norm32(npoints);
+ not_power_4 = n_stages & 1;
n_stages = n_stages >> 1;
- ptr_w = ixheaacd_twiddle_table_fft_32x32;
- for (i = 0; i < npoints; i += 4) {
- WORD32 *inp = ptr_x;
- h2 = ixheaacd_mps_dig_rev[i >> 2];
+ for (i = 0; i<nlength; i++)
+ {
+ ptr_x[2 * i] = xr[i];
+ ptr_x[2 * i + 1] = xi[i];
+ }
+
+ ptr_w = ixheaacd_twiddle_table_fft;
+
+ for (i = 0; i<npoints; i += 4)
+ {
+ FLOAT32 *inp = ptr_x;
+
+ DIG_REV(i, dig_rev_shift, h2);
+ if (not_power_4)
+ {
+ h2 += 1;
+ h2 &= ~1;
+ }
inp += (h2);
x0r = *inp;
@@ -97,23 +918,23 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
x3r = *inp;
x3i = *(inp + 1);
- x0r = ixheaacd_add32_sat(x0r, x2r);
- x0i = ixheaacd_add32_sat(x0i, x2i);
- x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
- x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
- x1r = ixheaacd_add32_sat(x1r, x3r);
- x1i = ixheaacd_add32_sat(x1i, x3i);
- x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
- x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
-
- x0r = ixheaacd_add32_sat(x0r, x1r);
- x0i = ixheaacd_add32_sat(x0i, x1i);
- x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
- x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
- x2r = ixheaacd_add32_sat(x2r, x3i);
- x2i = ixheaacd_sub32_sat(x2i, x3r);
- x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
- x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
+ x0r = x0r + x2r;
+ x0i = x0i + x2i;
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + x1r;
+ x0i = x0i + x1i;
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + x3i;
+ x2i = x2i - x3r;
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
*ptr_y++ = x0r;
*ptr_y++ = x0i;
@@ -128,13 +949,15 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
del = 4;
nodespacing = 64;
in_loop_cnt = npoints >> 4;
- for (i = n_stages - 1; i > 0; i--) {
- const WORD32 *twiddles = ptr_w;
- WORD32 *data = ptr_y;
- WORD32 w1h, w2h, w3h, w1l, w2l, w3l;
+ for (i = n_stages - 1; i>0; i--)
+ {
+ const FLOAT32 *twiddles = ptr_w;
+ FLOAT32 *data = ptr_y;
+ FLOAT32 w1h, w2h, w3h, w1l, w2l, w3l;
WORD32 sec_loop_cnt;
- for (k = in_loop_cnt; k != 0; k--) {
+ for (k = in_loop_cnt; k != 0; k--)
+ {
x0r = (*data);
x0i = (*(data + 1));
data += (del << 1);
@@ -151,23 +974,23 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
x3i = (*(data + 1));
data -= 3 * (del << 1);
- x0r = ixheaacd_add32_sat(x0r, x2r);
- x0i = ixheaacd_add32_sat(x0i, x2i);
- x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
- x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
- x1r = ixheaacd_add32_sat(x1r, x3r);
- x1i = ixheaacd_add32_sat(x1i, x3i);
- x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
- x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
-
- x0r = ixheaacd_add32_sat(x0r, x1r);
- x0i = ixheaacd_add32_sat(x0i, x1i);
- x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
- x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
- x2r = ixheaacd_add32_sat(x2r, x3i);
- x2i = ixheaacd_sub32_sat(x2i, x3r);
- x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
- x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
+ x0r = x0r + x2r;
+ x0i = x0i + x2i;
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + x1r;
+ x0i = x0i + x1i;
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + x3i;
+ x2i = x2i - x3r;
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
@@ -188,13 +1011,13 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
data = ptr_y + 2;
sec_loop_cnt = (nodespacing * del);
- sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) -
- (sec_loop_cnt / 16) + (sec_loop_cnt / 32) -
- (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
- (sec_loop_cnt / 256);
+ sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) \
+ + (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) \
+ - (sec_loop_cnt / 256);
j = nodespacing;
- for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
+ for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing)
+ {
w1h = *(twiddles + 2 * j);
w1l = *(twiddles + 2 * j + 1);
w2h = *(twiddles + 2 * (j << 1));
@@ -202,9 +1025,10 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
w3h = *(twiddles + 2 * j + 2 * (j << 1));
w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1);
- for (k = in_loop_cnt; k != 0; k--) {
- WORD32 tmp;
- WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+ for (k = in_loop_cnt; k != 0; k--)
+ {
+ FLOAT32 tmp;
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
@@ -220,41 +1044,38 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
x3i = *(data + 1);
data -= 3 * (del << 1);
- tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
- ixheaacd_mult32_sat(x1i, w1h));
- x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
+ tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
+ x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
x1r = tmp;
- tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
- ixheaacd_mult32_sat(x2i, w2h));
- x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
+ tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
+ x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
x2r = tmp;
- tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3l),
- ixheaacd_mult32_sat(x3i, w3h));
- x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
+ tmp = (ixheaacd_mult32X32float(x3r, w3l) - ixheaacd_mult32X32float(x3i, w3h));
+ x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
- x0r = ixheaacd_add32_sat(x0r, x2r);
- x0i = ixheaacd_add32_sat(x0i, x2i);
- x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
- x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
- x1r = ixheaacd_add32_sat(x1r, x3r);
- x1i = ixheaacd_add32_sat(x1i, x3i);
- x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
- x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
-
- x0r = ixheaacd_add32_sat(x0r, x1r);
- x0i = ixheaacd_add32_sat(x0i, x1i);
- x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
- x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
- x2r = ixheaacd_add32_sat(x2r, x3i);
- x2i = ixheaacd_sub32_sat(x2i, x3r);
- x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
- x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
@@ -275,7 +1096,8 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
data -= 2 * npoints;
data += 2;
}
- for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
+ for (; j <= (nodespacing * del) >> 1; j += nodespacing)
+ {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1));
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
@@ -283,9 +1105,10 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
w2l = *(twiddles + 2 * (j << 1) + 1);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
- for (k = in_loop_cnt; k != 0; k--) {
- WORD32 tmp;
- WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+ for (k = in_loop_cnt; k != 0; k--)
+ {
+ FLOAT32 tmp;
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
@@ -301,42 +1124,38 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
x3i = *(data + 1);
data -= 3 * (del << 1);
- tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
- ixheaacd_mult32_sat(x1i, w1h));
- x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
+ tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
+ x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
x1r = tmp;
- tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l),
- ixheaacd_mult32_sat(x2i, w2h));
- x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l);
+ tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h));
+ x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l);
x2r = tmp;
- tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
- ixheaacd_mult32_sat(x3i, w3l));
- x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
- ixheaacd_mult32_sat(x3r, w3l));
+ tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
+ x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
- x0r = ixheaacd_add32_sat(x0r, x2r);
- x0i = ixheaacd_add32_sat(x0i, x2i);
- x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
- x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
- x1r = ixheaacd_add32_sat(x1r, x3r);
- x1i = ixheaacd_add32_sat(x1i, x3i);
- x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
- x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
-
- x0r = ixheaacd_add32_sat(x0r, x1r);
- x0i = ixheaacd_add32_sat(x0i, x1i);
- x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
- x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
- x2r = ixheaacd_add32_sat(x2r, x3i);
- x2i = ixheaacd_sub32_sat(x2i, x3r);
- x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
- x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
@@ -357,7 +1176,8 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
data -= 2 * npoints;
data += 2;
}
- for (; j <= sec_loop_cnt * 2; j += nodespacing) {
+ for (; j <= sec_loop_cnt * 2; j += nodespacing)
+ {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1) - 512);
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512);
@@ -365,9 +1185,10 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
w2l = *(twiddles + 2 * (j << 1) - 511);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511);
- for (k = in_loop_cnt; k != 0; k--) {
- WORD32 tmp;
- WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+ for (k = in_loop_cnt; k != 0; k--)
+ {
+ FLOAT32 tmp;
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
@@ -383,43 +1204,38 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
x3i = *(data + 1);
data -= 3 * (del << 1);
- tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
- ixheaacd_mult32_sat(x1i, w1h));
- x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
+ tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
+ x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
x1r = tmp;
- tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
- ixheaacd_mult32_sat(x2i, w2l));
- x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
- ixheaacd_mult32_sat(x2r, w2l));
+ tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
+ x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
x2r = tmp;
- tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h),
- ixheaacd_mult32_sat(x3i, w3l));
- x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
- ixheaacd_mult32_sat(x3r, w3l));
+ tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l));
+ x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
- x0r = ixheaacd_add32_sat(x0r, x2r);
- x0i = ixheaacd_add32_sat(x0i, x2i);
- x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
- x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
- x1r = ixheaacd_add32_sat(x1r, x3r);
- x1i = ixheaacd_add32_sat(x1i, x3i);
- x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
- x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
-
- x0r = ixheaacd_add32_sat(x0r, x1r);
- x0i = ixheaacd_add32_sat(x0i, x1i);
- x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
- x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
- x2r = ixheaacd_add32_sat(x2r, x3i);
- x2i = ixheaacd_sub32_sat(x2i, x3r);
- x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
- x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i + x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i - (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
@@ -440,7 +1256,8 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
data -= 2 * npoints;
data += 2;
}
- for (; j < nodespacing * del; j += nodespacing) {
+ for (; j<nodespacing * del; j += nodespacing)
+ {
w1h = *(twiddles + 2 * j);
w2h = *(twiddles + 2 * (j << 1) - 512);
w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024);
@@ -448,9 +1265,10 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
w2l = *(twiddles + 2 * (j << 1) - 511);
w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023);
- for (k = in_loop_cnt; k != 0; k--) {
- WORD32 tmp;
- WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+ for (k = in_loop_cnt; k != 0; k--)
+ {
+ FLOAT32 tmp;
+ FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
data += (del << 1);
@@ -466,42 +1284,38 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
x3i = *(data + 1);
data -= 3 * (del << 1);
- tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l),
- ixheaacd_mult32_sat(x1i, w1h));
- x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l);
+ tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
+ x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
x1r = tmp;
- tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h),
- ixheaacd_mult32_sat(x2i, w2l));
- x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h),
- ixheaacd_mult32_sat(x2r, w2l));
+ tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l));
+ x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h);
x2r = tmp;
- tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h),
- ixheaacd_mult32_sat(x3r, w3l));
- x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l);
+ tmp = (-ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h));
+ x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l);
x3r = tmp;
x0r = (*data);
x0i = (*(data + 1));
- x0r = ixheaacd_add32_sat(x0r, x2r);
- x0i = ixheaacd_add32_sat(x0i, x2i);
- x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1));
- x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1));
- x1r = ixheaacd_add32_sat(x1r, x3r);
- x1i = ixheaacd_sub32_sat(x1i, x3i);
- x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1));
- x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1));
-
- x0r = ixheaacd_add32_sat(x0r, x1r);
- x0i = ixheaacd_add32_sat(x0i, x1i);
- x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1));
- x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1));
- x2r = ixheaacd_add32_sat(x2r, x3i);
- x2i = ixheaacd_sub32_sat(x2i, x3r);
- x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1));
- x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1));
+ x0r = x0r + (x2r);
+ x0i = x0i + (x2i);
+ x2r = x0r - (x2r * 2);
+ x2i = x0i - (x2i * 2);
+ x1r = x1r + x3r;
+ x1i = x1i - x3i;
+ x3r = x1r - (x3r * 2);
+ x3i = x1i + (x3i * 2);
+
+ x0r = x0r + (x1r);
+ x0i = x0i + (x1i);
+ x1r = x0r - (x1r * 2);
+ x1i = x0i - (x1i * 2);
+ x2r = x2r + (x3i);
+ x2i = x2i - (x3r);
+ x3i = x2r - (x3i * 2);
+ x3r = x2i + (x3r * 2);
*data = x0r;
*(data + 1) = x0i;
@@ -526,10 +1340,70 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re,
del <<= 2;
in_loop_cnt >>= 2;
}
+ if (not_power_4)
+ {
+ const FLOAT32 *twiddles = ptr_w;
+ nodespacing <<= 1;
+
+ for (j = del / 2; j != 0; j--)
+ {
+ FLOAT32 w1h = *twiddles;
+ FLOAT32 w1l = *(twiddles + 1);
+ FLOAT32 tmp;
+ twiddles += nodespacing * 2;
+
+ x0r = *ptr_y;
+ x0i = *(ptr_y + 1);
+ ptr_y += (del << 1);
+
+ x1r = *ptr_y;
+ x1i = *(ptr_y + 1);
+
+ tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h));
+ x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l);
+ x1r = tmp;
+
+ *ptr_y = (x0r) - (x1r);
+ *(ptr_y + 1) = (x0i) - (x1i);
+ ptr_y -= (del << 1);
+
+ *ptr_y = (x0r) + (x1r);
+ *(ptr_y + 1) = (x0i) + (x1i);
+ ptr_y += 2;
+ }
+ twiddles = ptr_w;
+ for (j = del / 2; j != 0; j--)
+ {
+ FLOAT32 w1h = *twiddles;
+ FLOAT32 w1l = *(twiddles + 1);
+ FLOAT32 tmp;
+ twiddles += nodespacing * 2;
+
+ x0r = *ptr_y;
+ x0i = *(ptr_y + 1);
+ ptr_y += (del << 1);
+
+ x1r = *ptr_y;
+ x1i = *(ptr_y + 1);
+
+ tmp = (ixheaacd_mult32X32float(x1r, w1h) + ixheaacd_mult32X32float(x1i, w1l));
+ x1i = -ixheaacd_mult32X32float(x1r, w1l) + ixheaacd_mult32X32float(x1i, w1h);
+ x1r = tmp;
+
+ *ptr_y = (x0r) - (x1r);
+ *(ptr_y + 1) = (x0i) - (x1i);
+ ptr_y -= (del << 1);
+
+ *ptr_y = (x0r) + (x1r);
+ *(ptr_y + 1) = (x0i) + (x1i);
+ ptr_y += 2;
+ }
+ }
- for (i = 0; i < 2 * nlength; i += 2) {
- fin_re[i] = y[i];
- fin_im[i] = y[i + 1];
+ for (i = 0; i<nlength; i++)
+ {
+ xr[i] = y[2 * i];
+ xi[i] = y[2 * i + 1];
}
return;
@@ -1087,7 +1961,6 @@ VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength,
ptr_y += 2;
}
}
-
}
else {
@@ -1715,13 +2588,6 @@ VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
if (fft_mode < 0) {
for (i = 0; i < nlength; i += 3) {
- tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1r)),
- ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1i)));
- ptr_x[2 * i + 1] =
- ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1i)),
- ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1r)));
- ptr_x[2 * i] = tmp;
-
w1r++;
w1i++;
@@ -1749,13 +2615,6 @@ VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
else {
for (i = 0; i < nlength; i += 3) {
- tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1r)),
- ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1i)));
- ptr_x[2 * i + 1] =
- ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1r)),
- ixheaacd_mult32_sat(ptr_x[2 * i], (*w1i)));
- ptr_x[2 * i] = tmp;
-
w1r++;
w1i++;
@@ -1789,34 +2648,25 @@ VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength,
ptr_y = ptr_y + 6;
}
+ ptr_y = y;
for (i = 0; i < mpass; i++) {
- xr[i] = y[6 * i];
- xi[i] = y[6 * i + 1];
- }
-
- for (i = 0; i < mpass; i++) {
- xr[mpass + i] = y[6 * i + 2];
- xi[mpass + i] = y[6 * i + 3];
+ xr[i] = *ptr_y++;
+ xi[i] = *ptr_y++;
+ xr[mpass + i] = *ptr_y++;
+ xi[mpass + i] = *ptr_y++;
+ xr[2 * mpass + i] = *ptr_y++;
+ xi[2 * mpass + i] = *ptr_y++;
}
- for (i = 0; i < mpass; i++) {
- xr[2 * mpass + i] = y[6 * i + 4];
- xi[2 * mpass + i] = y[6 * i + 5];
- }
return;
}
-WORD32 ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength,
- WORD32 fft_mode, WORD32 *preshift) {
+VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, WORD32 fft_mode,
+ WORD32 *preshift) {
if (nlength & (nlength - 1)) {
- if ((nlength != 24) && (nlength != 48) && (nlength != 96) &&
- (nlength != 192) && (nlength != 384)) {
- printf("%d point FFT not supported", nlength);
- return IA_FATAL_ERROR;
- }
ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift);
} else
(*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift);
- return 0;
+ return;
}