diff options
Diffstat (limited to 'decoder/ixheaacd_fft.c')
-rw-r--r-- | decoder/ixheaacd_fft.c | 1282 |
1 files changed, 1066 insertions, 216 deletions
diff --git a/decoder/ixheaacd_fft.c b/decoder/ixheaacd_fft.c index 0932097..ecb88c1 100644 --- a/decoder/ixheaacd_fft.c +++ b/decoder/ixheaacd_fft.c @@ -28,9 +28,11 @@ #include "ixheaacd_function_selector.h" extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514]; +extern const FLOAT32 ixheaacd_twiddle_table_fft[514]; +extern const FLOAT32 ixheaacd_twiddle_table_fft_flt[16]; extern const WORD32 ixheaacd_twiddle_table_3pr[1155]; extern const WORD32 ixheaacd_twiddle_table_3pi[1155]; -extern const WORD8 ixheaacd_mps_dig_rev[16]; +extern const WORD8 ixheaacd_mps_dig_rev[8]; #define PLATFORM_INLINE __inline @@ -61,25 +63,844 @@ static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) { return (result); } +static PLATFORM_INLINE FLOAT32 ixheaacd_mult32X32float(FLOAT32 a, FLOAT32 b) { + FLOAT32 result; -VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, - WORD32 *fin_im, WORD32 nlength) { - WORD32 i, j, k, n_stages; - WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + result = a * b; + + return result; +} + +static PLATFORM_INLINE FLOAT32 ixheaacd_mac32X32float(FLOAT32 a, FLOAT32 b, FLOAT32 c) { + FLOAT32 result; + + result = a + b * c; + + return result; +} + +VOID ixheaacd_mps_synth_calc_fft(FLOAT32 *ptr_xr, FLOAT32 *ptr_xi, + WORD32 npoints) { + WORD32 i, j, k; + FLOAT32 y[64], z[64]; + FLOAT32 *ptr_y = y, *ptr_z = z; + const FLOAT32 *ptr_w = ixheaacd_twiddle_table_fft_flt; + + for (i = 0; i < npoints; i += 4) { + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + FLOAT32 *inp = ptr_xr; + FLOAT32 tmk; + + WORD32 h2 = ixheaacd_mps_dig_rev[i >> 2]; + + inp += (h2); + + x0r = *inp; + x0i = *(inp + 1); + inp += 16; + + x1r = *inp; + x1i = *(inp + 1); + inp += 16; + + x2r = *inp; + x2i = *(inp + 1); + inp += 16; + + x3r = *inp; + x3i = *(inp + 1); + + x0r = x0r + x2r; + x0i = x0i + x2i; + + tmk = x0r - x2r; + x2r = tmk - x2r; + tmk = x0i - x2i; + x2i = tmk - x2i; + + x1r = x1r + x3r; + x1i = x1i + x3i; + + tmk = x1r - x3r; + x3r = tmk - x3r; + tmk = x1i - x3i; + x3i = tmk - x3i; + + x0r = x0r + x1r; + x0i = x0i + x1i; + + tmk = x0r - x1r; + x1r = tmk - x1r; + tmk = x0i - x1i; + x1i = tmk - x1i; + + x2r = x2r + x3i; + x2i = x2i - x3r; + + tmk = x2r - x3i; + x3i = tmk - x3i; + tmk = x2i + x3r; + x3r = tmk + x3r; + + *ptr_y++ = x0r; + *ptr_y++ = x0i; + *ptr_y++ = x2r; + *ptr_y++ = x2i; + *ptr_y++ = x1r; + *ptr_y++ = x1i; + *ptr_y++ = x3i; + *ptr_y++ = x3r; + + inp = ptr_xi; + + inp += (h2); + + x0r = *inp; + x0i = *(inp + 1); + inp += 16; + + x1r = *inp; + x1i = *(inp + 1); + inp += 16; + + x2r = *inp; + x2i = *(inp + 1); + inp += 16; + + x3r = *inp; + x3i = *(inp + 1); + + x0r = x0r + x2r; + x0i = x0i + x2i; + + tmk = x0r - x2r; + x2r = tmk - x2r; + tmk = x0i - x2i; + x2i = tmk - x2i; + + x1r = x1r + x3r; + x1i = x1i + x3i; + + tmk = x1r - x3r; + x3r = tmk - x3r; + tmk = x1i - x3i; + x3i = tmk - x3i; + + x0r = x0r + x1r; + x0i = x0i + x1i; + + tmk = x0r - x1r; + x1r = tmk - x1r; + tmk = x0i - x1i; + x1i = tmk - x1i; + + x2r = x2r + x3i; + x2i = x2i - x3r; + + tmk = x2r - x3i; + x3i = tmk - x3i; + tmk = x2i + x3r; + x3r = tmk + x3r; + + *ptr_z++ = x0r; + *ptr_z++ = x0i; + *ptr_z++ = x2r; + *ptr_z++ = x2i; + *ptr_z++ = x1r; + *ptr_z++ = x1i; + *ptr_z++ = x3i; + *ptr_z++ = x3r; + } + ptr_y -= 64; + ptr_z -= 64; + { + FLOAT32 *data_r = ptr_y; + FLOAT32 *data_i = ptr_z; + for (k = 2; k != 0; k--) { + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = (*data_r); + x0i = (*(data_r + 1)); + data_r += 8; + + x1r = (*data_r); + x1i = (*(data_r + 1)); + data_r += 8; + + x2r = (*data_r); + x2i = (*(data_r + 1)); + data_r += 8; + + x3r = (*data_r); + x3i = (*(data_r + 1)); + data_r -= 24; + + x0r = x0r + x2r; + x0i = x0i + x2i; + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + x1r; + x0i = x0i + x1i; + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + x3i; + x2i = x2i - x3r; + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); + + *data_r = x0r; + *(data_r + 1) = x0i; + data_r += 8; + + *data_r = x2r; + *(data_r + 1) = x2i; + data_r += 8; + + *data_r = x1r; + *(data_r + 1) = x1i; + data_r += 8; + + *data_r = x3i; + *(data_r + 1) = x3r; + data_r += 8; + + x0r = (*data_i); + x0i = (*(data_i + 1)); + data_i += 8; + + x1r = (*data_i); + x1i = (*(data_i + 1)); + data_i += 8; + + x2r = (*data_i); + x2i = (*(data_i + 1)); + data_i += 8; + + x3r = (*data_i); + x3i = (*(data_i + 1)); + data_i -= 24; + + x0r = x0r + x2r; + x0i = x0i + x2i; + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + x1r; + x0i = x0i + x1i; + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + x3i; + x2i = x2i - x3r; + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); + + *data_i = x0r; + *(data_i + 1) = x0i; + data_i += 8; + + *data_i = x2r; + *(data_i + 1) = x2i; + data_i += 8; + + *data_i = x1r; + *(data_i + 1) = x1i; + data_i += 8; + + *data_i = x3i; + *(data_i + 1) = x3r; + data_i += 8; + } + data_r = ptr_y + 2; + data_i = ptr_z + 2; + + for (k = 2; k != 0; k--) { + FLOAT32 tmp; + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + data_r += 8; + + x1r = *data_r; + x1i = *(data_r + 1); + data_r += 8; + + x2r = *data_r; + x2i = *(data_r + 1); + data_r += 8; + + x3r = *data_r; + x3i = *(data_r + 1); + data_r -= 24; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) - + ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f)); + x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f), + (FLOAT32)x1i, 0.923880f); + x1r = tmp; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) - + ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f)); + x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f), + (FLOAT32)x2i, 0.707107f); + x2r = tmp; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) - + ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f)); + x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f), + (FLOAT32)x3i, 0.382683f); + x3r = tmp; + + x0r = (*data_r); + x0i = (*(data_r + 1)); + + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); + + *data_r = x0r; + *(data_r + 1) = x0i; + data_r += 8; + + *data_r = x2r; + *(data_r + 1) = x2i; + data_r += 8; + + *data_r = x1r; + *(data_r + 1) = x1i; + data_r += 8; + + *data_r = x3i; + *(data_r + 1) = x3r; + data_r += 8; + data_i += 8; + + x1r = *data_i; + x1i = *(data_i + 1); + data_i += 8; + + x2r = *data_i; + x2i = *(data_i + 1); + data_i += 8; + + x3r = *data_i; + x3i = *(data_i + 1); + data_i -= 24; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.923880f) - + ixheaacd_mult32X32float((FLOAT32)x1i, -0.382683f)); + x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.382683f), + (FLOAT32)x1i, 0.923880f); + x1r = tmp; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) - + ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f)); + x2i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f), + (FLOAT32)x2i, 0.707107f); + x2r = tmp; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, 0.382683f) - + ixheaacd_mult32X32float((FLOAT32)x3i, -0.923880f)); + x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.923880f), + (FLOAT32)x3i, 0.382683f); + x3r = tmp; + + x0r = (*data_i); + x0i = (*(data_i + 1)); + + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); + + *data_i = x0r; + *(data_i + 1) = x0i; + data_i += 8; + + *data_i = x2r; + *(data_i + 1) = x2i; + data_i += 8; + + *data_i = x1r; + *(data_i + 1) = x1i; + data_i += 8; + + *data_i = x3i; + *(data_i + 1) = x3r; + data_i += 8; + } + data_r -= 62; + data_i -= 62; + for (k = 2; k != 0; k--) { + FLOAT32 tmp; + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + data_r += 8; + + x1r = *data_r; + x1i = *(data_r + 1); + data_r += 8; + + x2r = *data_r; + x2i = *(data_r + 1); + data_r += 8; + + x3r = *data_r; + x3i = *(data_r + 1); + data_r -= 24; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) - + ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f)); + x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f), + (FLOAT32)x1i, 0.707107f); + x1r = tmp; + + tmp = x2i; + x2i = -x2r; + x2r = tmp; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) + + ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f)); + x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) + + ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f)); + x3r = tmp; + + x0r = (*data_r); + x0i = (*(data_r + 1)); + + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); + + *data_r = x0r; + *(data_r + 1) = x0i; + data_r += 8; + + *data_r = x2r; + *(data_r + 1) = x2i; + data_r += 8; + + *data_r = x1r; + *(data_r + 1) = x1i; + data_r += 8; + + *data_r = x3i; + *(data_r + 1) = x3r; + data_r += 8; + data_i += 8; + + x1r = *data_i; + x1i = *(data_i + 1); + data_i += 8; + + x2r = *data_i; + x2i = *(data_i + 1); + data_i += 8; + + x3r = *data_i; + x3i = *(data_i + 1); + data_i -= 24; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.707107f) - + ixheaacd_mult32X32float((FLOAT32)x1i, -0.707107f)); + x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.707107f), + (FLOAT32)x1i, 0.707107f); + x1r = tmp; + + tmp = x2i; + x2i = -x2r; + x2r = tmp; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x3r, -0.707107f) + + ixheaacd_mult32X32float((FLOAT32)x3i, 0.707107f)); + x3i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.707107f) + + ixheaacd_mult32X32float((FLOAT32)x3i, -0.707107f)); + x3r = tmp; + + x0r = (*data_i); + x0i = (*(data_i + 1)); + + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); + + *data_i = x0r; + *(data_i + 1) = x0i; + data_i += 8; + + *data_i = x2r; + *(data_i + 1) = x2i; + data_i += 8; + + *data_i = x1r; + *(data_i + 1) = x1i; + data_i += 8; + + *data_i = x3i; + *(data_i + 1) = x3r; + data_i += 8; + } + data_r -= 62; + data_i -= 62; + for (k = 2; k != 0; k--) { + FLOAT32 tmp; + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + data_r += 8; + + x1r = *data_r; + x1i = *(data_r + 1); + data_r += 8; + + x2r = *data_r; + x2i = *(data_r + 1); + data_r += 8; + + x3r = *data_r; + x3i = *(data_r + 1); + data_r -= 24; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) - + ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f)); + x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f), + (FLOAT32)x1i, 0.382683f); + x1r = tmp; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) + + ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f)); + x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) + + ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f)); + x2r = tmp; + + tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) + + ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f)); + x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f), + (FLOAT32)x3i, 0.923880f); + x3r = tmp; + + x0r = (*data_r); + x0i = (*(data_r + 1)); + + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i - x3i; + x3r = x1r - (x3r * 2); + x3i = x1i + (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); + + *data_r = x0r; + *(data_r + 1) = x0i; + data_r += 8; + + *data_r = x2r; + *(data_r + 1) = x2i; + data_r += 8; + + *data_r = x1r; + *(data_r + 1) = x1i; + data_r += 8; + + *data_r = x3i; + *(data_r + 1) = x3r; + data_r += 8; + data_i += 8; + + x1r = *data_i; + x1i = *(data_i + 1); + data_i += 8; + + x2r = *data_i; + x2i = *(data_i + 1); + data_i += 8; + + x3r = *data_i; + x3i = *(data_i + 1); + data_i -= 24; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, 0.382683f) - + ixheaacd_mult32X32float((FLOAT32)x1i, -0.923880f)); + x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, -0.923880f), + (FLOAT32)x1i, 0.382683f); + x1r = tmp; + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x2r, -0.707107f) + + ixheaacd_mult32X32float((FLOAT32)x2i, 0.707107f)); + x2i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x2r, 0.707107f) + + ixheaacd_mult32X32float((FLOAT32)x2i, -0.707107f)); + x2r = tmp; + + tmp = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x3r, 0.923880f) + + ixheaacd_mult32X32float((FLOAT32)x3i, -0.382683f)); + x3i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x3r, -0.382683f), + (FLOAT32)x3i, 0.923880f); + x3r = tmp; + + x0r = (*data_i); + x0i = (*(data_i + 1)); + + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i - x3i; + x3r = x1r - (x3r * 2); + x3i = x1i + (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); + + *data_i = x0r; + *(data_i + 1) = x0i; + data_i += 8; + + *data_i = x2r; + *(data_i + 1) = x2i; + data_i += 8; + + *data_i = x1r; + *(data_i + 1) = x1i; + data_i += 8; + + *data_i = x3i; + *(data_i + 1) = x3r; + data_i += 8; + } + data_r -= 62; + data_i -= 62; + } + { + const FLOAT32 *twiddles = ptr_w; + FLOAT32 x0r, x0i, x1r, x1i; + for (j = 8; j != 0; j--) { + FLOAT32 W1 = *twiddles; + twiddles++; + FLOAT32 W4 = *twiddles; + twiddles++; + FLOAT32 tmp; + + x0r = *ptr_y; + x0i = *(ptr_y + 1); + ptr_y += 32; + ptr_xr += 32; + + x1r = *ptr_y; + x1i = *(ptr_y + 1); + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) - + ixheaacd_mult32X32float((FLOAT32)x1i, W4)); + x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4), + (FLOAT32)x1i, W1); + x1r = tmp; + + *ptr_xr = (x0r) - (x1r); + *(ptr_xr + 1) = (x0i) - (x1i); + ptr_y -= 32; + ptr_xr -= 32; + + *ptr_xr = (x0r) + (x1r); + *(ptr_xr + 1) = (x0i) + (x1i); + ptr_y += 2; + ptr_xr += 2; + + x0r = *ptr_z; + x0i = *(ptr_z + 1); + ptr_z += 32; + ptr_xi += 32; + + x1r = *ptr_z; + x1i = *(ptr_z + 1); + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W1) - + ixheaacd_mult32X32float((FLOAT32)x1i, W4)); + x1i = (FLOAT32)ixheaacd_mac32X32float(ixheaacd_mult32X32float((FLOAT32)x1r, W4), + (FLOAT32)x1i, W1); + x1r = tmp; + + *ptr_xi = (x0r) - (x1r); + *(ptr_xi + 1) = (x0i) - (x1i); + ptr_z -= 32; + ptr_xi -= 32; + + *ptr_xi = (x0r) + (x1r); + *(ptr_xi + 1) = (x0i) + (x1i); + ptr_z += 2; + ptr_xi += 2; + } + twiddles = ptr_w; + for (j = 8; j != 0; j--) { + FLOAT32 W1 = *twiddles; + twiddles++; + FLOAT32 W4 = *twiddles; + twiddles++; + FLOAT32 tmp; + + x0r = *ptr_y; + x0i = *(ptr_y + 1); + ptr_y += 32; + ptr_xr += 32; + + x1r = *ptr_y; + x1i = *(ptr_y + 1); + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) + + ixheaacd_mult32X32float((FLOAT32)x1i, W1)); + x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) + + ixheaacd_mult32X32float((FLOAT32)x1i, W4)); + x1r = tmp; + + *ptr_xr = (x0r) - (x1r); + *(ptr_xr + 1) = (x0i) - (x1i); + ptr_y -= 32; + ptr_xr -= 32; + + *ptr_xr = (x0r) + (x1r); + *(ptr_xr + 1) = (x0i) + (x1i); + ptr_y += 2; + ptr_xr += 2; + + x0r = *ptr_z; + x0i = *(ptr_z + 1); + ptr_z += 32; + ptr_xi += 32; + + x1r = *ptr_z; + x1i = *(ptr_z + 1); + + tmp = (FLOAT32)(ixheaacd_mult32X32float((FLOAT32)x1r, W4) + + ixheaacd_mult32X32float((FLOAT32)x1i, W1)); + x1i = (FLOAT32)(-ixheaacd_mult32X32float((FLOAT32)x1r, W1) + + ixheaacd_mult32X32float((FLOAT32)x1i, W4)); + x1r = tmp; + + *ptr_xi = (x0r) - (x1r); + *(ptr_xi + 1) = (x0i) - (x1i); + ptr_z -= 32; + ptr_xi -= 32; + + *ptr_xi = (x0r) + (x1r); + *(ptr_xi + 1) = (x0i) + (x1i); + ptr_z += 2; + ptr_xi += 2; + } + } +} + +VOID ixheaacd_mps_complex_fft(FLOAT32 *xr, FLOAT32 *xi, WORD32 nlength) { + WORD32 i, j, k, n_stages, h2; + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; WORD32 del, nodespacing, in_loop_cnt; - WORD32 y[128]; + WORD32 dig_rev_shift; + WORD32 not_power_4; + FLOAT32 ptr_x[256]; + FLOAT32 y[256]; WORD32 npoints = nlength; - WORD32 *ptr_y = y; - const WORD32 *ptr_w; + FLOAT32 *ptr_y = y; + const FLOAT32 *ptr_w; + dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16; n_stages = 30 - ixheaacd_norm32(npoints); + not_power_4 = n_stages & 1; n_stages = n_stages >> 1; - ptr_w = ixheaacd_twiddle_table_fft_32x32; - for (i = 0; i < npoints; i += 4) { - WORD32 *inp = ptr_x; - h2 = ixheaacd_mps_dig_rev[i >> 2]; + for (i = 0; i<nlength; i++) + { + ptr_x[2 * i] = xr[i]; + ptr_x[2 * i + 1] = xi[i]; + } + + ptr_w = ixheaacd_twiddle_table_fft; + + for (i = 0; i<npoints; i += 4) + { + FLOAT32 *inp = ptr_x; + + DIG_REV(i, dig_rev_shift, h2); + if (not_power_4) + { + h2 += 1; + h2 &= ~1; + } inp += (h2); x0r = *inp; @@ -97,23 +918,23 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, x3r = *inp; x3i = *(inp + 1); - x0r = ixheaacd_add32_sat(x0r, x2r); - x0i = ixheaacd_add32_sat(x0i, x2i); - x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); - x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); - x1r = ixheaacd_add32_sat(x1r, x3r); - x1i = ixheaacd_add32_sat(x1i, x3i); - x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); - x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); - - x0r = ixheaacd_add32_sat(x0r, x1r); - x0i = ixheaacd_add32_sat(x0i, x1i); - x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); - x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); - x2r = ixheaacd_add32_sat(x2r, x3i); - x2i = ixheaacd_sub32_sat(x2i, x3r); - x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); - x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); + x0r = x0r + x2r; + x0i = x0i + x2i; + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + x1r; + x0i = x0i + x1i; + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + x3i; + x2i = x2i - x3r; + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); *ptr_y++ = x0r; *ptr_y++ = x0i; @@ -128,13 +949,15 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, del = 4; nodespacing = 64; in_loop_cnt = npoints >> 4; - for (i = n_stages - 1; i > 0; i--) { - const WORD32 *twiddles = ptr_w; - WORD32 *data = ptr_y; - WORD32 w1h, w2h, w3h, w1l, w2l, w3l; + for (i = n_stages - 1; i>0; i--) + { + const FLOAT32 *twiddles = ptr_w; + FLOAT32 *data = ptr_y; + FLOAT32 w1h, w2h, w3h, w1l, w2l, w3l; WORD32 sec_loop_cnt; - for (k = in_loop_cnt; k != 0; k--) { + for (k = in_loop_cnt; k != 0; k--) + { x0r = (*data); x0i = (*(data + 1)); data += (del << 1); @@ -151,23 +974,23 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, x3i = (*(data + 1)); data -= 3 * (del << 1); - x0r = ixheaacd_add32_sat(x0r, x2r); - x0i = ixheaacd_add32_sat(x0i, x2i); - x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); - x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); - x1r = ixheaacd_add32_sat(x1r, x3r); - x1i = ixheaacd_add32_sat(x1i, x3i); - x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); - x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); - - x0r = ixheaacd_add32_sat(x0r, x1r); - x0i = ixheaacd_add32_sat(x0i, x1i); - x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); - x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); - x2r = ixheaacd_add32_sat(x2r, x3i); - x2i = ixheaacd_sub32_sat(x2i, x3r); - x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); - x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); + x0r = x0r + x2r; + x0i = x0i + x2i; + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + x1r; + x0i = x0i + x1i; + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + x3i; + x2i = x2i - x3r; + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; @@ -188,13 +1011,13 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, data = ptr_y + 2; sec_loop_cnt = (nodespacing * del); - sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - - (sec_loop_cnt / 16) + (sec_loop_cnt / 32) - - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - - (sec_loop_cnt / 256); + sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) \ + + (sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) \ + - (sec_loop_cnt / 256); j = nodespacing; - for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { + for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) + { w1h = *(twiddles + 2 * j); w1l = *(twiddles + 2 * j + 1); w2h = *(twiddles + 2 * (j << 1)); @@ -202,9 +1025,10 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, w3h = *(twiddles + 2 * j + 2 * (j << 1)); w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1); - for (k = in_loop_cnt; k != 0; k--) { - WORD32 tmp; - WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + for (k = in_loop_cnt; k != 0; k--) + { + FLOAT32 tmp; + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; data += (del << 1); @@ -220,41 +1044,38 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, x3i = *(data + 1); data -= 3 * (del << 1); - tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), - ixheaacd_mult32_sat(x1i, w1h)); - x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); + tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h)); + x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l); x1r = tmp; - tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l), - ixheaacd_mult32_sat(x2i, w2h)); - x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l); + tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h)); + x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l); x2r = tmp; - tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3l), - ixheaacd_mult32_sat(x3i, w3h)); - x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l); + tmp = (ixheaacd_mult32X32float(x3r, w3l) - ixheaacd_mult32X32float(x3i, w3h)); + x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); - x0r = ixheaacd_add32_sat(x0r, x2r); - x0i = ixheaacd_add32_sat(x0i, x2i); - x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); - x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); - x1r = ixheaacd_add32_sat(x1r, x3r); - x1i = ixheaacd_add32_sat(x1i, x3i); - x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); - x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); - - x0r = ixheaacd_add32_sat(x0r, x1r); - x0i = ixheaacd_add32_sat(x0i, x1i); - x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); - x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); - x2r = ixheaacd_add32_sat(x2r, x3i); - x2i = ixheaacd_sub32_sat(x2i, x3r); - x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); - x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; @@ -275,7 +1096,8 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, data -= 2 * npoints; data += 2; } - for (; j <= (nodespacing * del) >> 1; j += nodespacing) { + for (; j <= (nodespacing * del) >> 1; j += nodespacing) + { w1h = *(twiddles + 2 * j); w2h = *(twiddles + 2 * (j << 1)); w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); @@ -283,9 +1105,10 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, w2l = *(twiddles + 2 * (j << 1) + 1); w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); - for (k = in_loop_cnt; k != 0; k--) { - WORD32 tmp; - WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + for (k = in_loop_cnt; k != 0; k--) + { + FLOAT32 tmp; + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; data += (del << 1); @@ -301,42 +1124,38 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, x3i = *(data + 1); data -= 3 * (del << 1); - tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), - ixheaacd_mult32_sat(x1i, w1h)); - x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); + tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h)); + x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l); x1r = tmp; - tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l), - ixheaacd_mult32_sat(x2i, w2h)); - x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l); + tmp = (ixheaacd_mult32X32float(x2r, w2l) - ixheaacd_mult32X32float(x2i, w2h)); + x2i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x2r, w2h), x2i, w2l); x2r = tmp; - tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h), - ixheaacd_mult32_sat(x3i, w3l)); - x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h), - ixheaacd_mult32_sat(x3r, w3l)); + tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l)); + x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); - x0r = ixheaacd_add32_sat(x0r, x2r); - x0i = ixheaacd_add32_sat(x0i, x2i); - x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); - x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); - x1r = ixheaacd_add32_sat(x1r, x3r); - x1i = ixheaacd_add32_sat(x1i, x3i); - x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); - x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); - - x0r = ixheaacd_add32_sat(x0r, x1r); - x0i = ixheaacd_add32_sat(x0i, x1i); - x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); - x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); - x2r = ixheaacd_add32_sat(x2r, x3i); - x2i = ixheaacd_sub32_sat(x2i, x3r); - x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); - x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; @@ -357,7 +1176,8 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, data -= 2 * npoints; data += 2; } - for (; j <= sec_loop_cnt * 2; j += nodespacing) { + for (; j <= sec_loop_cnt * 2; j += nodespacing) + { w1h = *(twiddles + 2 * j); w2h = *(twiddles + 2 * (j << 1) - 512); w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); @@ -365,9 +1185,10 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, w2l = *(twiddles + 2 * (j << 1) - 511); w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); - for (k = in_loop_cnt; k != 0; k--) { - WORD32 tmp; - WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + for (k = in_loop_cnt; k != 0; k--) + { + FLOAT32 tmp; + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; data += (del << 1); @@ -383,43 +1204,38 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, x3i = *(data + 1); data -= 3 * (del << 1); - tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), - ixheaacd_mult32_sat(x1i, w1h)); - x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); + tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h)); + x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l); x1r = tmp; - tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h), - ixheaacd_mult32_sat(x2i, w2l)); - x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h), - ixheaacd_mult32_sat(x2r, w2l)); + tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l)); + x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h); x2r = tmp; - tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h), - ixheaacd_mult32_sat(x3i, w3l)); - x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h), - ixheaacd_mult32_sat(x3r, w3l)); + tmp = (ixheaacd_mult32X32float(x3r, w3h) + ixheaacd_mult32X32float(x3i, w3l)); + x3i = -ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); - x0r = ixheaacd_add32_sat(x0r, x2r); - x0i = ixheaacd_add32_sat(x0i, x2i); - x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); - x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); - x1r = ixheaacd_add32_sat(x1r, x3r); - x1i = ixheaacd_add32_sat(x1i, x3i); - x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); - x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); - - x0r = ixheaacd_add32_sat(x0r, x1r); - x0i = ixheaacd_add32_sat(x0i, x1i); - x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); - x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); - x2r = ixheaacd_add32_sat(x2r, x3i); - x2i = ixheaacd_sub32_sat(x2i, x3r); - x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); - x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i + x3i; + x3r = x1r - (x3r * 2); + x3i = x1i - (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; @@ -440,7 +1256,8 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, data -= 2 * npoints; data += 2; } - for (; j < nodespacing * del; j += nodespacing) { + for (; j<nodespacing * del; j += nodespacing) + { w1h = *(twiddles + 2 * j); w2h = *(twiddles + 2 * (j << 1) - 512); w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024); @@ -448,9 +1265,10 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, w2l = *(twiddles + 2 * (j << 1) - 511); w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023); - for (k = in_loop_cnt; k != 0; k--) { - WORD32 tmp; - WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + for (k = in_loop_cnt; k != 0; k--) + { + FLOAT32 tmp; + FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; data += (del << 1); @@ -466,42 +1284,38 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, x3i = *(data + 1); data -= 3 * (del << 1); - tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), - ixheaacd_mult32_sat(x1i, w1h)); - x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); + tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h)); + x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l); x1r = tmp; - tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h), - ixheaacd_mult32_sat(x2i, w2l)); - x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h), - ixheaacd_mult32_sat(x2r, w2l)); + tmp = (ixheaacd_mult32X32float(x2r, w2h) + ixheaacd_mult32X32float(x2i, w2l)); + x2i = -ixheaacd_mult32X32float(x2r, w2l) + ixheaacd_mult32X32float(x2i, w2h); x2r = tmp; - tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h), - ixheaacd_mult32_sat(x3r, w3l)); - x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l); + tmp = (-ixheaacd_mult32X32float(x3r, w3l) + ixheaacd_mult32X32float(x3i, w3h)); + x3i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x3r, w3h), x3i, w3l); x3r = tmp; x0r = (*data); x0i = (*(data + 1)); - x0r = ixheaacd_add32_sat(x0r, x2r); - x0i = ixheaacd_add32_sat(x0i, x2i); - x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); - x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); - x1r = ixheaacd_add32_sat(x1r, x3r); - x1i = ixheaacd_sub32_sat(x1i, x3i); - x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); - x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); - - x0r = ixheaacd_add32_sat(x0r, x1r); - x0i = ixheaacd_add32_sat(x0i, x1i); - x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); - x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); - x2r = ixheaacd_add32_sat(x2r, x3i); - x2i = ixheaacd_sub32_sat(x2i, x3r); - x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); - x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); + x0r = x0r + (x2r); + x0i = x0i + (x2i); + x2r = x0r - (x2r * 2); + x2i = x0i - (x2i * 2); + x1r = x1r + x3r; + x1i = x1i - x3i; + x3r = x1r - (x3r * 2); + x3i = x1i + (x3i * 2); + + x0r = x0r + (x1r); + x0i = x0i + (x1i); + x1r = x0r - (x1r * 2); + x1i = x0i - (x1i * 2); + x2r = x2r + (x3i); + x2i = x2i - (x3r); + x3i = x2r - (x3i * 2); + x3r = x2i + (x3r * 2); *data = x0r; *(data + 1) = x0i; @@ -526,10 +1340,70 @@ VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, del <<= 2; in_loop_cnt >>= 2; } + if (not_power_4) + { + const FLOAT32 *twiddles = ptr_w; + nodespacing <<= 1; + + for (j = del / 2; j != 0; j--) + { + FLOAT32 w1h = *twiddles; + FLOAT32 w1l = *(twiddles + 1); + FLOAT32 tmp; + twiddles += nodespacing * 2; + + x0r = *ptr_y; + x0i = *(ptr_y + 1); + ptr_y += (del << 1); + + x1r = *ptr_y; + x1i = *(ptr_y + 1); + + tmp = (ixheaacd_mult32X32float(x1r, w1l) - ixheaacd_mult32X32float(x1i, w1h)); + x1i = ixheaacd_mac32X32float(ixheaacd_mult32X32float(x1r, w1h), x1i, w1l); + x1r = tmp; + + *ptr_y = (x0r) - (x1r); + *(ptr_y + 1) = (x0i) - (x1i); + ptr_y -= (del << 1); + + *ptr_y = (x0r) + (x1r); + *(ptr_y + 1) = (x0i) + (x1i); + ptr_y += 2; + } + twiddles = ptr_w; + for (j = del / 2; j != 0; j--) + { + FLOAT32 w1h = *twiddles; + FLOAT32 w1l = *(twiddles + 1); + FLOAT32 tmp; + twiddles += nodespacing * 2; + + x0r = *ptr_y; + x0i = *(ptr_y + 1); + ptr_y += (del << 1); + + x1r = *ptr_y; + x1i = *(ptr_y + 1); + + tmp = (ixheaacd_mult32X32float(x1r, w1h) + ixheaacd_mult32X32float(x1i, w1l)); + x1i = -ixheaacd_mult32X32float(x1r, w1l) + ixheaacd_mult32X32float(x1i, w1h); + x1r = tmp; + + *ptr_y = (x0r) - (x1r); + *(ptr_y + 1) = (x0i) - (x1i); + ptr_y -= (del << 1); + + *ptr_y = (x0r) + (x1r); + *(ptr_y + 1) = (x0i) + (x1i); + ptr_y += 2; + } + } - for (i = 0; i < 2 * nlength; i += 2) { - fin_re[i] = y[i]; - fin_im[i] = y[i + 1]; + for (i = 0; i<nlength; i++) + { + xr[i] = y[2 * i]; + xi[i] = y[2 * i + 1]; } return; @@ -1087,7 +1961,6 @@ VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength, ptr_y += 2; } } - } else { @@ -1715,13 +2588,6 @@ VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength, if (fft_mode < 0) { for (i = 0; i < nlength; i += 3) { - tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1r)), - ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1i))); - ptr_x[2 * i + 1] = - ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1i)), - ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1r))); - ptr_x[2 * i] = tmp; - w1r++; w1i++; @@ -1749,13 +2615,6 @@ VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength, else { for (i = 0; i < nlength; i += 3) { - tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1r)), - ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1i))); - ptr_x[2 * i + 1] = - ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1r)), - ixheaacd_mult32_sat(ptr_x[2 * i], (*w1i))); - ptr_x[2 * i] = tmp; - w1r++; w1i++; @@ -1789,34 +2648,25 @@ VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength, ptr_y = ptr_y + 6; } + ptr_y = y; for (i = 0; i < mpass; i++) { - xr[i] = y[6 * i]; - xi[i] = y[6 * i + 1]; - } - - for (i = 0; i < mpass; i++) { - xr[mpass + i] = y[6 * i + 2]; - xi[mpass + i] = y[6 * i + 3]; + xr[i] = *ptr_y++; + xi[i] = *ptr_y++; + xr[mpass + i] = *ptr_y++; + xi[mpass + i] = *ptr_y++; + xr[2 * mpass + i] = *ptr_y++; + xi[2 * mpass + i] = *ptr_y++; } - for (i = 0; i < mpass; i++) { - xr[2 * mpass + i] = y[6 * i + 4]; - xi[2 * mpass + i] = y[6 * i + 5]; - } return; } -WORD32 ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, - WORD32 fft_mode, WORD32 *preshift) { +VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, WORD32 fft_mode, + WORD32 *preshift) { if (nlength & (nlength - 1)) { - if ((nlength != 24) && (nlength != 48) && (nlength != 96) && - (nlength != 192) && (nlength != 384)) { - printf("%d point FFT not supported", nlength); - return IA_FATAL_ERROR; - } ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift); } else (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift); - return 0; + return; } |