diff options
Diffstat (limited to 'modules/audio_coding')
16 files changed, 2198 insertions, 217 deletions
diff --git a/modules/audio_coding/codecs/isac/fix/source/codec.h b/modules/audio_coding/codecs/isac/fix/source/codec.h index 2f649324..a38c6e56 100644 --- a/modules/audio_coding/codecs/isac/fix/source/codec.h +++ b/modules/audio_coding/codecs/isac/fix/source/codec.h @@ -101,6 +101,16 @@ void WebRtcIsacfix_Spec2TimeNeon(int16_t* inreQ7, int32_t* outre2Q16); #endif +#if defined(MIPS32_LE) +void WebRtcIsacfix_Time2SpecMIPS(int16_t* inre1Q9, + int16_t* inre2Q9, + int16_t* outre, + int16_t* outim); +void WebRtcIsacfix_Spec2TimeMIPS(int16_t* inreQ7, + int16_t* inimQ7, + int32_t* outre1Q16, + int32_t* outre2Q16); +#endif /* filterbank functions */ diff --git a/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h b/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h index 3fefc1a5..7a5f7462 100644 --- a/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h +++ b/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h @@ -23,10 +23,23 @@ extern "C" { * coefficient: Input. * state: Input/output, filter state, in Q4. */ -void WebRtcIsacfix_HighpassFilterFixDec32(int16_t *io, - int16_t len, - const int16_t *coefficient, - int32_t *state); +typedef void (*HighpassFilterFixDec32)(int16_t* io, + int16_t len, + const int16_t* coefficient, + int32_t* state); +extern HighpassFilterFixDec32 WebRtcIsacfix_HighpassFilterFixDec32; + +void WebRtcIsacfix_HighpassFilterFixDec32C(int16_t* io, + int16_t len, + const int16_t* coefficient, + int32_t* state); + +#if defined(MIPS_DSP_R1_LE) +void WebRtcIsacfix_HighpassFilterFixDec32MIPS(int16_t* io, + int16_t len, + const int16_t* coefficient, + int32_t* state); +#endif typedef void (*AllpassFilter2FixDec16)( int16_t *data_ch1, // Input and output in channel 1, in Q0 diff --git a/modules/audio_coding/codecs/isac/fix/source/filterbanks.c b/modules/audio_coding/codecs/isac/fix/source/filterbanks.c index 64557e13..1928a7cb 100644 --- a/modules/audio_coding/codecs/isac/fix/source/filterbanks.c +++ b/modules/audio_coding/codecs/isac/fix/source/filterbanks.c @@ -86,10 +86,13 @@ void WebRtcIsacfix_AllpassFilter2FixDec16C( filter_state_ch2[1] = state1_ch2; } -void WebRtcIsacfix_HighpassFilterFixDec32(int16_t *io, - int16_t len, - const int16_t *coefficient, - int32_t *state) +// Declare a function pointer. +HighpassFilterFixDec32 WebRtcIsacfix_HighpassFilterFixDec32; + +void WebRtcIsacfix_HighpassFilterFixDec32C(int16_t *io, + int16_t len, + const int16_t *coefficient, + int32_t *state) { int k; int32_t a1 = 0, b1 = 0, c = 0, in = 0; diff --git a/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c b/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c index 1887745b..4dd70cf6 100644 --- a/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c +++ b/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c @@ -10,26 +10,26 @@ #include "webrtc/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h" -// WebRtcIsacfix_AllpassFilter2FixDec16 function optimized for MIPSDSP platform -// Bit-exact with WebRtcIsacfix_AllpassFilter2FixDec16C from filterbanks.c +// WebRtcIsacfix_AllpassFilter2FixDec16 function optimized for MIPSDSP platform. +// Bit-exact with WebRtcIsacfix_AllpassFilter2FixDec16C from filterbanks.c. void WebRtcIsacfix_AllpassFilter2FixDec16MIPS( - int16_t *data_ch1, // Input and output in channel 1, in Q0 - int16_t *data_ch2, // Input and output in channel 2, in Q0 - const int16_t *factor_ch1, // Scaling factor for channel 1, in Q15 - const int16_t *factor_ch2, // Scaling factor for channel 2, in Q15 - const int length, // Length of the data buffers - int32_t *filter_state_ch1, // Filter state for channel 1, in Q16 - int32_t *filter_state_ch2) { // Filter state for channel 2, in Q16 + int16_t* data_ch1, // Input and output in channel 1, in Q0. + int16_t* data_ch2, // Input and output in channel 2, in Q0. + const int16_t* factor_ch1, // Scaling factor for channel 1, in Q15. + const int16_t* factor_ch2, // Scaling factor for channel 2, in Q15. + const int length, // Length of the data buffers. + int32_t* filter_state_ch1, // Filter state for channel 1, in Q16. + int32_t* filter_state_ch2) { // Filter state for channel 2, in Q16. - int32_t st0_ch1, st1_ch1; // channel1 state variables - int32_t st0_ch2, st1_ch2; // channel2 state variables - int32_t f_ch10, f_ch11, f_ch20, f_ch21; // factor variables - int32_t r0, r1, r2, r3, r4, r5; // temporary ragister variables + int32_t st0_ch1, st1_ch1; // channel1 state variables. + int32_t st0_ch2, st1_ch2; // channel2 state variables. + int32_t f_ch10, f_ch11, f_ch20, f_ch21; // factor variables. + int32_t r0, r1, r2, r3, r4, r5; // temporary register variables. __asm __volatile ( ".set push \n\t" ".set noreorder \n\t" - // Load all the state and factor variables + // Load all the state and factor variables. "lh %[f_ch10], 0(%[factor_ch1]) \n\t" "lh %[f_ch20], 0(%[factor_ch2]) \n\t" "lh %[f_ch11], 2(%[factor_ch1]) \n\t" @@ -38,7 +38,7 @@ void WebRtcIsacfix_AllpassFilter2FixDec16MIPS( "lw %[st1_ch1], 4(%[filter_state_ch1]) \n\t" "lw %[st0_ch2], 0(%[filter_state_ch2]) \n\t" "lw %[st1_ch2], 4(%[filter_state_ch2]) \n\t" - // Allpass filtering loop + // Allpass filtering loop. "1: \n\t" "lh %[r0], 0(%[data_ch1]) \n\t" "lh %[r1], 0(%[data_ch2]) \n\t" @@ -80,7 +80,7 @@ void WebRtcIsacfix_AllpassFilter2FixDec16MIPS( "subq_s.w %[st1_ch2], %[r3], %[r1] \n\t" "bgtz %[length], 1b \n\t" " addiu %[data_ch2], %[data_ch2], 2 \n\t" - // Store channel states + // Store channel states. "sw %[st0_ch1], 0(%[filter_state_ch1]) \n\t" "sw %[st1_ch1], 4(%[filter_state_ch1]) \n\t" "sw %[st0_ch2], 0(%[filter_state_ch2]) \n\t" @@ -100,3 +100,143 @@ void WebRtcIsacfix_AllpassFilter2FixDec16MIPS( : "memory", "hi", "lo" ); } + +// WebRtcIsacfix_HighpassFilterFixDec32 function optimized for MIPSDSP platform. +// Bit-exact with WebRtcIsacfix_HighpassFilterFixDec32C from filterbanks.c. +void WebRtcIsacfix_HighpassFilterFixDec32MIPS(int16_t* io, + int16_t len, + const int16_t* coefficient, + int32_t* state) { + int k; + int32_t a1, a2, b1, b2, in; + int32_t state0 = state[0]; + int32_t state1 = state[1]; + + int32_t c0, c1, c2, c3; + int32_t c4, c5, c6, c7; + int32_t state0_lo, state0_hi; + int32_t state1_lo, state1_hi; + int32_t t0, t1, t2, t3, t4, t5; + + __asm __volatile ( + "lh %[c0], 0(%[coeff_ptr]) \n\t" + "lh %[c1], 2(%[coeff_ptr]) \n\t" + "lh %[c2], 4(%[coeff_ptr]) \n\t" + "lh %[c3], 6(%[coeff_ptr]) \n\t" + "sra %[state0_hi], %[state0], 16 \n\t" + "sra %[state1_hi], %[state1], 16 \n\t" + "andi %[state0_lo], %[state0], 0xFFFF \n\t" + "andi %[state1_lo], %[state1], 0xFFFF \n\t" + "lh %[c4], 8(%[coeff_ptr]) \n\t" + "lh %[c5], 10(%[coeff_ptr]) \n\t" + "lh %[c6], 12(%[coeff_ptr]) \n\t" + "lh %[c7], 14(%[coeff_ptr]) \n\t" + "sra %[state0_lo], %[state0_lo], 1 \n\t" + "sra %[state1_lo], %[state1_lo], 1 \n\t" + : [c0] "=&r" (c0), [c1] "=&r" (c1), [c2] "=&r" (c2), [c3] "=&r" (c3), + [c4] "=&r" (c4), [c5] "=&r" (c5), [c6] "=&r" (c6), [c7] "=&r" (c7), + [state0_hi] "=&r" (state0_hi), [state0_lo] "=&r" (state0_lo), + [state1_hi] "=&r" (state1_hi), [state1_lo] "=&r" (state1_lo) + : [coeff_ptr] "r" (coefficient), [state0] "r" (state0), + [state1] "r" (state1) + : "memory" + ); + + for (k = 0; k < len; k++) { + in = (int32_t)io[k]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "mul %[t2], %[c4], %[state0_lo] \n\t" + "mul %[t0], %[c5], %[state0_lo] \n\t" + "mul %[t1], %[c4], %[state0_hi] \n\t" + "mul %[a1], %[c5], %[state0_hi] \n\t" + "mul %[t5], %[c6], %[state1_lo] \n\t" + "mul %[t3], %[c7], %[state1_lo] \n\t" + "mul %[t4], %[c6], %[state1_hi] \n\t" + "mul %[b1], %[c7], %[state1_hi] \n\t" + "shra_r.w %[t2], %[t2], 15 \n\t" + "shra_r.w %[t0], %[t0], 15 \n\t" + "addu %[t1], %[t1], %[t2] \n\t" + "addu %[a1], %[a1], %[t0] \n\t" + "sra %[t1], %[t1], 16 \n\t" + "addu %[a1], %[a1], %[t1] \n\t" + "shra_r.w %[t5], %[t5], 15 \n\t" + "shra_r.w %[t3], %[t3], 15 \n\t" + "addu %[t4], %[t4], %[t5] \n\t" + "addu %[b1], %[b1], %[t3] \n\t" + "sra %[t4], %[t4], 16 \n\t" + "addu %[b1], %[b1], %[t4] \n\t" + "mul %[t2], %[c0], %[state0_lo] \n\t" + "mul %[t0], %[c1], %[state0_lo] \n\t" + "mul %[t1], %[c0], %[state0_hi] \n\t" + "mul %[a2], %[c1], %[state0_hi] \n\t" + "mul %[t5], %[c2], %[state1_lo] \n\t" + "mul %[t3], %[c3], %[state1_lo] \n\t" + "mul %[t4], %[c2], %[state1_hi] \n\t" + "mul %[b2], %[c3], %[state1_hi] \n\t" + "shra_r.w %[t2], %[t2], 15 \n\t" + "shra_r.w %[t0], %[t0], 15 \n\t" + "addu %[t1], %[t1], %[t2] \n\t" + "addu %[a2], %[a2], %[t0] \n\t" + "sra %[t1], %[t1], 16 \n\t" + "addu %[a2], %[a2], %[t1] \n\t" + "shra_r.w %[t5], %[t5], 15 \n\t" + "shra_r.w %[t3], %[t3], 15 \n\t" + "addu %[t4], %[t4], %[t5] \n\t" + "addu %[b2], %[b2], %[t3] \n\t" + "sra %[t4], %[t4], 16 \n\t" + "addu %[b2], %[b2], %[t4] \n\t" + "addu %[a1], %[a1], %[b1] \n\t" + "sra %[a1], %[a1], 7 \n\t" + "addu %[a1], %[a1], %[in] \n\t" + "sll %[t0], %[in], 2 \n\t" + "addu %[a2], %[a2], %[b2] \n\t" + "subu %[t0], %[t0], %[a2] \n\t" + "shll_s.w %[a1], %[a1], 16 \n\t" + "shll_s.w %[t0], %[t0], 2 \n\t" + "sra %[a1], %[a1], 16 \n\t" + "addu %[state1_hi], %[state0_hi], $0 \n\t" + "addu %[state1_lo], %[state0_lo], $0 \n\t" + "sra %[state0_hi], %[t0], 16 \n\t" + "andi %[state0_lo], %[t0], 0xFFFF \n\t" + "sra %[state0_lo], %[state0_lo], 1 \n\t" + ".set pop \n\t" + : [a1] "=&r" (a1), [b1] "=&r" (b1), [a2] "=&r" (a2), [b2] "=&r" (b2), + [state0_hi] "+r" (state0_hi), [state0_lo] "+r" (state0_lo), + [state1_hi] "+r" (state1_hi), [state1_lo] "+r" (state1_lo), + [t0] "=&r" (t0), [t1] "=&r" (t1), [t2] "=&r" (t2), + [t3] "=&r" (t3), [t4] "=&r" (t4), [t5] "=&r" (t5) + : [c0] "r" (c0), [c1] "r" (c1), [c2] "r" (c2), [c3] "r" (c3), + [c4] "r" (c4), [c5] "r" (c5), [c6] "r" (c6), [c7] "r" (c7), + [in] "r" (in) + : "hi", "lo" + ); + io[k] = (int16_t)a1; + } + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" +#if !defined(MIPS_DSP_R2_LE) + "sll %[state0_hi], %[state0_hi], 16 \n\t" + "sll %[state0_lo], %[state0_lo], 1 \n\t" + "sll %[state1_hi], %[state1_hi], 16 \n\t" + "sll %[state1_lo], %[state1_lo], 1 \n\t" + "or %[state0_hi], %[state0_hi], %[state0_lo] \n\t" + "or %[state1_hi], %[state1_hi], %[state1_lo] \n\t" +#else + "sll %[state0_lo], %[state0_lo], 1 \n\t" + "sll %[state1_lo], %[state1_lo], 1 \n\t" + "precr_sra.ph.w %[state0_hi], %[state0_lo], 0 \n\t" + "precr_sra.ph.w %[state1_hi], %[state1_lo], 0 \n\t" +#endif + "sw %[state0_hi], 0(%[state]) \n\t" + "sw %[state1_hi], 4(%[state]) \n\t" + ".set pop \n\t" + : [state0_hi] "+r" (state0_hi), [state0_lo] "+r" (state0_lo), + [state1_hi] "+r" (state1_hi), [state1_lo] "+r" (state1_lo) + : [state] "r" (state) + : "memory" + ); +} diff --git a/modules/audio_coding/codecs/isac/fix/source/filterbanks_unittest.cc b/modules/audio_coding/codecs/isac/fix/source/filterbanks_unittest.cc index d7484277..d15318a7 100644 --- a/modules/audio_coding/codecs/isac/fix/source/filterbanks_unittest.cc +++ b/modules/audio_coding/codecs/isac/fix/source/filterbanks_unittest.cc @@ -86,6 +86,13 @@ TEST_F(FilterBanksTest, HighpassFilterFixDec32Test) { -1280, -8554, -14496, -7561, -23541, -27263, -30560, -32768, -3441, -32768, 25203, -27550, 22419}; #endif + HighpassFilterFixDec32 WebRtcIsacfix_HighpassFilterFixDec32; +#if defined(MIPS_DSP_R1_LE) + WebRtcIsacfix_HighpassFilterFixDec32 = + WebRtcIsacfix_HighpassFilterFixDec32MIPS; +#else + WebRtcIsacfix_HighpassFilterFixDec32 = WebRtcIsacfix_HighpassFilterFixDec32C; +#endif for (int i = 0; i < kSamples; i++) { in[i] = WEBRTC_SPL_WORD32_MAX / (i + 1); diff --git a/modules/audio_coding/codecs/isac/fix/source/isacfix.c b/modules/audio_coding/codecs/isac/fix/source/isacfix.c index 76359080..887a7ba2 100644 --- a/modules/audio_coding/codecs/isac/fix/source/isacfix.c +++ b/modules/audio_coding/codecs/isac/fix/source/isacfix.c @@ -209,9 +209,17 @@ static void WebRtcIsacfix_InitNeon(void) { static void WebRtcIsacfix_InitMIPS(void) { WebRtcIsacfix_AutocorrFix = WebRtcIsacfix_AutocorrMIPS; WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopMIPS; + WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeMIPS; + WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecMIPS; #if defined(MIPS_DSP_R1_LE) WebRtcIsacfix_AllpassFilter2FixDec16 = WebRtcIsacfix_AllpassFilter2FixDec16MIPS; + WebRtcIsacfix_HighpassFilterFixDec32 = + WebRtcIsacfix_HighpassFilterFixDec32MIPS; +#endif +#if defined(MIPS_DSP_R2_LE) + WebRtcIsacfix_CalculateResidualEnergy = + WebRtcIsacfix_CalculateResidualEnergyMIPS; #endif } #endif @@ -300,10 +308,11 @@ int16_t WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst, WebRtcIsacfix_CalculateResidualEnergy = WebRtcIsacfix_CalculateResidualEnergyC; WebRtcIsacfix_AllpassFilter2FixDec16 = WebRtcIsacfix_AllpassFilter2FixDec16C; + WebRtcIsacfix_HighpassFilterFixDec32 = WebRtcIsacfix_HighpassFilterFixDec32C; WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecC; WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeC; WebRtcIsacfix_MatrixProduct1 = WebRtcIsacfix_MatrixProduct1C; - WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2C ; + WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2C; #ifdef WEBRTC_DETECT_ARM_NEON if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { diff --git a/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi b/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi index a18a803d..e5aade65 100644 --- a/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi +++ b/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi @@ -47,12 +47,14 @@ 'lpc_masking_model.c', 'lpc_tables.c', 'pitch_estimator.c', + 'pitch_estimator_c.c', 'pitch_filter.c', 'pitch_filter_c.c', 'pitch_gain_tables.c', 'pitch_lag_tables.c', 'spectrum_ar_model_tables.c', 'transform.c', + 'transform_tables.c', 'arith_routins.h', 'bandwidth_estimator.h', 'codec.h', @@ -89,9 +91,12 @@ 'sources': [ 'filters_mips.c', 'lattice_mips.c', + 'pitch_estimator_mips.c', + 'transform_mips.c', ], 'sources!': [ 'lattice_c.c', + 'pitch_estimator_c.c', ], 'conditions': [ ['mips_dsp_rev>0', { @@ -101,6 +106,7 @@ }], ['mips_dsp_rev>1', { 'sources': [ + 'lpc_masking_model_mips.c', 'pitch_filter_mips.c', ], 'sources!': [ diff --git a/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h b/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h index 72e0cfc4..1270c142 100644 --- a/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h +++ b/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h @@ -62,6 +62,15 @@ int32_t WebRtcIsacfix_CalculateResidualEnergyNeon(int lpc_order, int* q_val_residual_energy); #endif +#if defined(MIPS_DSP_R2_LE) +int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order, + int32_t q_val_corr, + int q_val_polynomial, + int16_t* a_polynomial, + int32_t* corr_coeffs, + int* q_val_residual_energy); +#endif + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model_mips.c b/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model_mips.c new file mode 100644 index 00000000..55602b97 --- /dev/null +++ b/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model_mips.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h" + +// MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy +// Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file +// lpc_masking_model.c +int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order, + int32_t q_val_corr, + int q_val_polynomial, + int16_t* a_polynomial, + int32_t* corr_coeffs, + int* q_val_residual_energy) { + + int i = 0, j = 0; + int shift_internal = 0, shift_norm = 0; + int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0; + int32_t tmp_corr_c = corr_coeffs[0]; + int16_t* tmp_a_poly = &a_polynomial[0]; + int32_t sum64_hi = 0; + int32_t sum64_lo = 0; + + for (j = 0; j <= lpc_order; j++) { + // For the case of i == 0: + // residual_energy += + // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i]; + + int32_t tmp2, tmp3; + int16_t sign_1; + int16_t sign_2; + int16_t sign_3; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[tmp2], 0(%[tmp_a_poly]) \n\t" + "mul %[tmp32], %[tmp2], %[tmp2] \n\t" + "addiu %[tmp_a_poly], %[tmp_a_poly], 2 \n\t" + "sra %[sign_2], %[sum64_hi], 31 \n\t" + "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t" + "shilov $ac0, %[shift_internal] \n\t" + "mfhi %[tmp2], $ac0 \n\t" + "mflo %[tmp3], $ac0 \n\t" + "sra %[sign_1], %[tmp2], 31 \n\t" + "xor %[sign_3], %[sign_1], %[sign_2] \n\t" + ".set pop \n\t" + : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32), + [tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1), + [sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2), + [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) + : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal) + : "hi", "lo", "memory" + ); + + if (sign_3 != 0) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" + "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" + ".set pop \n\t" + : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) + : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3) + : "hi", "lo", "memory" + ); + } else { + if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) || + ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) { + // Shift right for overflow. + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[shift_internal], %[shift_internal], 1 \n\t" + "prepend %[sum64_lo], %[sum64_hi], 1 \n\t" + "sra %[sum64_hi], %[sum64_hi], 1 \n\t" + "prepend %[tmp3], %[tmp2], 1 \n\t" + "sra %[tmp2], %[tmp2], 1 \n\t" + "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" + "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" + ".set pop \n\t" + : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), + [shift_internal] "+r" (shift_internal), + [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) + : + : "hi", "lo", "memory" + ); + } else { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" + "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" + ".set pop \n\t" + : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) + : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3) + : "hi", "lo", "memory" + ); + } + } + } + + for (i = 1; i <= lpc_order; i++) { + tmp_corr_c = corr_coeffs[i]; + int16_t* tmp_a_poly_j = &a_polynomial[i]; + int16_t* tmp_a_poly_j_i = &a_polynomial[0]; + for (j = i; j <= lpc_order; j++) { + // For the case of i = 1 .. lpc_order: + // residual_energy += + // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2; + + int32_t tmp2, tmp3; + int16_t sign_1; + int16_t sign_2; + int16_t sign_3; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[tmp3], 0(%[tmp_a_poly_j]) \n\t" + "lh %[tmp2], 0(%[tmp_a_poly_j_i]) \n\t" + "addiu %[tmp_a_poly_j], %[tmp_a_poly_j], 2 \n\t" + "addiu %[tmp_a_poly_j_i], %[tmp_a_poly_j_i], 2 \n\t" + "mul %[tmp32], %[tmp3], %[tmp2] \n\t" + "sll %[tmp32], %[tmp32], 1 \n\t" + "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t" + "shilov $ac0, %[shift_internal] \n\t" + "mfhi %[tmp2], $ac0 \n\t" + "mflo %[tmp3], $ac0 \n\t" + "sra %[sign_1], %[tmp2], 31 \n\t" + "sra %[sign_2], %[sum64_hi], 31 \n\t" + "xor %[sign_3], %[sign_1], %[sign_2] \n\t" + ".set pop \n\t" + : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32), + [tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1), + [tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2), + [sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi), + [sum64_lo] "+r" (sum64_lo) + : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal) + : "hi", "lo", "memory" + ); + if (sign_3 != 0) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" + "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" + ".set pop \n\t" + : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi), + [sum64_lo] "+r" (sum64_lo) + : + :"memory" + ); + } else { + // Test overflow and sum the result. + if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) || + ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) { + // Shift right for overflow. + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[shift_internal], %[shift_internal], 1 \n\t" + "prepend %[sum64_lo], %[sum64_hi], 1 \n\t" + "sra %[sum64_hi], %[sum64_hi], 1 \n\t" + "prepend %[tmp3], %[tmp2], 1 \n\t" + "sra %[tmp2], %[tmp2], 1 \n\t" + "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" + "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" + ".set pop \n\t" + : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), + [shift_internal] "+r" (shift_internal), + [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) + : + : "hi", "lo", "memory" + ); + } else { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t" + "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t" + ".set pop \n\t" + : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), + [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) + : + : "hi", "lo", "memory" + ); + } + } + } + } + word32_high = sum64_hi; + word32_low = sum64_lo; + + // Calculate the value of shifting (shift_norm) for the 64-bit sum. + if (word32_high != 0) { + shift_norm = 32 - WebRtcSpl_NormW32(word32_high); + int tmp1; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "srl %[residual_energy], %[sum64_lo], %[shift_norm] \n\t" + "li %[tmp1], 32 \n\t" + "subu %[tmp1], %[tmp1], %[shift_norm] \n\t" + "sll %[tmp1], %[sum64_hi], %[tmp1] \n\t" + "or %[residual_energy], %[residual_energy], %[tmp1] \n\t" + ".set pop \n\t" + : [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1), + [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo) + : [shift_norm] "r" (shift_norm) + : "memory" + ); + } else { + if ((word32_low & 0x80000000) != 0) { + shift_norm = 1; + residual_energy = (uint32_t)word32_low >> 1; + } else { + shift_norm = WebRtcSpl_NormW32(word32_low); + residual_energy = word32_low << shift_norm; + shift_norm = -shift_norm; + } + } + + // Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm + // = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2) + *q_val_residual_energy = + q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2; + + return residual_energy; +} diff --git a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.c b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.c index 9c4e5875..426b2cf4 100644 --- a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.c +++ b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.c @@ -29,7 +29,7 @@ static const int16_t kACoefQ12[3] = { -static __inline int32_t Log2Q8( uint32_t x ) { +__inline int32_t WebRtcIsacfix_Log2Q8( uint32_t x ) { int32_t zeros, lg2; int16_t frac; @@ -153,109 +153,7 @@ static void FindFour32(int32_t *in, int16_t length, int16_t *bestind) -static void PCorr2Q32(const int16_t *in, int32_t *logcorQ8) -{ - int16_t scaling,n,k; - int32_t ysum32,csum32, lys, lcs; - int32_t oneQ8; - - - const int16_t *x, *inptr; - - oneQ8 = WEBRTC_SPL_LSHIFT_W32((int32_t)1, 8); // 1.00 in Q8 - - x = in + PITCH_MAX_LAG/2 + 2; - scaling = WebRtcSpl_GetScalingSquare ((int16_t *) in, PITCH_CORR_LEN2, PITCH_CORR_LEN2); - ysum32 = 1; - csum32 = 0; - x = in + PITCH_MAX_LAG/2 + 2; - for (n = 0; n < PITCH_CORR_LEN2; n++) { - ysum32 += WEBRTC_SPL_MUL_16_16_RSFT( (int16_t) in[n],(int16_t) in[n], scaling); // Q0 - csum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t) x[n],(int16_t) in[n], scaling); // Q0 - } - - logcorQ8 += PITCH_LAG_SPAN2 - 1; - - lys=Log2Q8((uint32_t) ysum32); // Q8 - lys=WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum); - - if (csum32>0) { - - lcs=Log2Q8((uint32_t) csum32); // 2log(csum) in Q8 - - if (lcs>(lys + oneQ8) ){ // csum/sqrt(ysum) > 2 in Q8 - *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum)) - } else { - *logcorQ8 = oneQ8; // 1.00 - } - - } else { - *logcorQ8 = 0; - } - - - for (k = 1; k < PITCH_LAG_SPAN2; k++) { - inptr = &in[k]; - ysum32 -= WEBRTC_SPL_MUL_16_16_RSFT( (int16_t) in[k-1],(int16_t) in[k-1], scaling); - ysum32 += WEBRTC_SPL_MUL_16_16_RSFT( (int16_t) in[PITCH_CORR_LEN2 + k - 1],(int16_t) in[PITCH_CORR_LEN2 + k - 1], scaling); - -#ifdef WEBRTC_ARCH_ARM_NEON - { - int32_t vbuff[4]; - int32x4_t int_32x4_sum = vmovq_n_s32(0); - // Can't shift a Neon register to right with a non-constant shift value. - int32x4_t int_32x4_scale = vdupq_n_s32(-scaling); - // Assert a codition used in loop unrolling at compile-time. - COMPILE_ASSERT(PITCH_CORR_LEN2 %4 == 0); - - for (n = 0; n < PITCH_CORR_LEN2; n += 4) { - int16x4_t int_16x4_x = vld1_s16(&x[n]); - int16x4_t int_16x4_in = vld1_s16(&inptr[n]); - int32x4_t int_32x4 = vmull_s16(int_16x4_x, int_16x4_in); - int_32x4 = vshlq_s32(int_32x4, int_32x4_scale); - int_32x4_sum = vaddq_s32(int_32x4_sum, int_32x4); - } - - // Use vector store to avoid long stall from data trasferring - // from vector to general register. - vst1q_s32(vbuff, int_32x4_sum); - csum32 = vbuff[0] + vbuff[1]; - csum32 += vbuff[2]; - csum32 += vbuff[3]; - } -#else - csum32 = 0; - if(scaling == 0) { - for (n = 0; n < PITCH_CORR_LEN2; n++) { - csum32 += x[n] * inptr[n]; - } - } else { - for (n = 0; n < PITCH_CORR_LEN2; n++) { - csum32 += (x[n] * inptr[n]) >> scaling; - } - } -#endif - - logcorQ8--; - - lys=Log2Q8((uint32_t)ysum32); // Q8 - lys=WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum); - - if (csum32>0) { - - lcs=Log2Q8((uint32_t) csum32); // 2log(csum) in Q8 - - if (lcs>(lys + oneQ8) ){ // csum/sqrt(ysum) > 2 - *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum)) - } else { - *logcorQ8 = oneQ8; // 1.00 - } - - } else { - *logcorQ8 = 0; - } - } -} +extern void WebRtcIsacfix_PCorr2Q32(const int16_t *in, int32_t *logcorQ8); @@ -311,12 +209,13 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */ /* compute correlation for first and second half of the frame */ - PCorr2Q32(buf_dec16, crrvecQ8_1); - PCorr2Q32(buf_dec16 + PITCH_CORR_STEP2, crrvecQ8_2); + WebRtcIsacfix_PCorr2Q32(buf_dec16, crrvecQ8_1); + WebRtcIsacfix_PCorr2Q32(buf_dec16 + PITCH_CORR_STEP2, crrvecQ8_2); /* bias towards pitch lag of previous frame */ - tmp32a = Log2Q8((uint32_t) old_lagQ8) - 2304; // log2(0.5*oldlag) in Q8 + tmp32a = WebRtcIsacfix_Log2Q8((uint32_t) old_lagQ8) - 2304; + // log2(0.5*oldlag) in Q8 tmp32b = WEBRTC_SPL_MUL_16_16_RSFT(oldgQ12,oldgQ12, 10); //Q12 & * 4.0; gain_bias16 = (int16_t) tmp32b; //Q12 if (gain_bias16 > 3276) gain_bias16 = 3276; // 0.8 in Q12 @@ -325,7 +224,7 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */ for (k = 0; k < PITCH_LAG_SPAN2; k++) { if (crrvecQ8_1[k]>0) { - tmp32b = Log2Q8((uint32_t) (k + (PITCH_MIN_LAG/2-2))); + tmp32b = WebRtcIsacfix_Log2Q8((uint32_t) (k + (PITCH_MIN_LAG/2-2))); tmp16a = (int16_t) (tmp32b - tmp32a); // Q8 & fabs(ratio)<4 tmp32c = WEBRTC_SPL_MUL_16_16_RSFT(tmp16a,tmp16a, 6); //Q10 tmp16b = (int16_t) tmp32c; // Q10 & <8 @@ -334,7 +233,8 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */ tmp16d = Exp2Q10((int16_t) -tmp16c); //Q10 tmp32c = WEBRTC_SPL_MUL_16_16_RSFT(gain_bias16,tmp16d,13); // Q10 & * 0.5 bias16 = (int16_t) (1024 + tmp32c); // Q10 - tmp32b = Log2Q8((uint32_t) bias16) - 2560; // Q10 in -> Q8 out with 10*2^8 offset + tmp32b = WebRtcIsacfix_Log2Q8((uint32_t)bias16) - 2560; + // Q10 in -> Q8 out with 10*2^8 offset crrvecQ8_1[k] += tmp32b ; // -10*2^8 offset } } @@ -407,7 +307,7 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */ xq[0] = WEBRTC_SPL_LSHIFT_W32(xq[0], 8); Intrp1DQ8(xq, fxq, yq, fyq); - tmp32a= Log2Q8((uint32_t) *yq) - 2048; // offset 8*2^8 + tmp32a= WebRtcIsacfix_Log2Q8((uint32_t) *yq) - 2048; // offset 8*2^8 /* Bias towards short lags */ /* log(pow(0.8, log(2.0 * *y )))/log(2.0) */ tmp32b= WEBRTC_SPL_MUL_16_16_RSFT((int16_t) tmp32a, -42, 8); @@ -437,10 +337,13 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */ tmp32b = (int32_t) (WEBRTC_SPL_LSHIFT_W32(tmp32a, 1)) - ratq; // Q8 tmp32c = WEBRTC_SPL_MUL_16_16_RSFT((int16_t) tmp32b, (int16_t) tmp32b, 8); // Q8 - tmp32b = (int32_t) tmp32c + (int32_t) WEBRTC_SPL_RSHIFT_W32(ratq, 1); // (k-r)^2 + 0.5 * r Q8 - tmp32c = Log2Q8((uint32_t) tmp32a) - 2048; // offset 8*2^8 , log2(0.5*k) Q8 - tmp32d = Log2Q8((uint32_t) tmp32b) - 2048; // offset 8*2^8 , log2(0.5*k) Q8 - tmp32e = tmp32c -tmp32d; + tmp32b = (int32_t)tmp32c + (int32_t)WEBRTC_SPL_RSHIFT_W32(ratq, 1); + // (k-r)^2 + 0.5 * r Q8 + tmp32c = WebRtcIsacfix_Log2Q8((uint32_t)tmp32a) - 2048; + // offset 8*2^8 , log2(0.5*k) Q8 + tmp32d = WebRtcIsacfix_Log2Q8((uint32_t)tmp32b) - 2048; + // offset 8*2^8 , log2(0.5*k) Q8 + tmp32e = tmp32c - tmp32d; cv2q[k] += WEBRTC_SPL_RSHIFT_W32(tmp32e, 1); @@ -481,7 +384,7 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */ /* Bias towards short lags */ /* log(pow(0.8, log(2.0f * *y )))/log(2.0f) */ - tmp32a= Log2Q8((uint32_t) *yq) - 2048; // offset 8*2^8 + tmp32a= WebRtcIsacfix_Log2Q8((uint32_t) *yq) - 2048; // offset 8*2^8 tmp32b= WEBRTC_SPL_MUL_16_16_RSFT((int16_t) tmp32a, -82, 8); tmp32c= tmp32b + 256; *fyq += tmp32c; diff --git a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h index 93c81c8e..da401e5f 100644 --- a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h +++ b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h @@ -58,4 +58,8 @@ void WebRtcIsacfix_DecimateAllpass32(const int16_t *in, int16_t N, /* number of input samples */ int16_t *out); /* array of size N/2 */ +int32_t WebRtcIsacfix_Log2Q8( uint32_t x ); + +void WebRtcIsacfix_PCorr2Q32(const int16_t* in, int32_t* logcorQ8); + #endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_PITCH_ESTIMATOR_H_ */ diff --git a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_c.c b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_c.c new file mode 100644 index 00000000..82155d27 --- /dev/null +++ b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_c.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h" + +#ifdef WEBRTC_ARCH_ARM_NEON +#include <arm_neon.h> +#endif + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/system_wrappers/interface/compile_assert_c.h" + +extern int32_t WebRtcIsacfix_Log2Q8(uint32_t x); + +void WebRtcIsacfix_PCorr2Q32(const int16_t* in, int32_t* logcorQ8) { + int16_t scaling,n,k; + int32_t ysum32,csum32, lys, lcs; + int32_t oneQ8; + const int16_t* x; + const int16_t* inptr; + + oneQ8 = WEBRTC_SPL_LSHIFT_W32((int32_t)1, 8); // 1.00 in Q8 + + x = in + PITCH_MAX_LAG / 2 + 2; + scaling = WebRtcSpl_GetScalingSquare((int16_t*)in, + PITCH_CORR_LEN2, + PITCH_CORR_LEN2); + ysum32 = 1; + csum32 = 0; + x = in + PITCH_MAX_LAG / 2 + 2; + for (n = 0; n < PITCH_CORR_LEN2; n++) { + ysum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)in[n], + (int16_t)in[n], + scaling); // Q0 + csum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)x[n], + (int16_t)in[n], + scaling); // Q0 + } + logcorQ8 += PITCH_LAG_SPAN2 - 1; + lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8 + lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum); + if (csum32 > 0) { + lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8 + if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2 in Q8 + *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum)) + } else { + *logcorQ8 = oneQ8; // 1.00 + } + } else { + *logcorQ8 = 0; + } + + + for (k = 1; k < PITCH_LAG_SPAN2; k++) { + inptr = &in[k]; + ysum32 -= WEBRTC_SPL_MUL_16_16_RSFT((int16_t)in[k - 1], + (int16_t)in[k - 1], + scaling); + ysum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)in[PITCH_CORR_LEN2 + k - 1], + (int16_t)in[PITCH_CORR_LEN2 + k - 1], + scaling); +#ifdef WEBRTC_ARCH_ARM_NEON + { + int32_t vbuff[4]; + int32x4_t int_32x4_sum = vmovq_n_s32(0); + // Can't shift a Neon register to right with a non-constant shift value. + int32x4_t int_32x4_scale = vdupq_n_s32(-scaling); + // Assert a codition used in loop unrolling at compile-time. + COMPILE_ASSERT(PITCH_CORR_LEN2 %4 == 0); + + for (n = 0; n < PITCH_CORR_LEN2; n += 4) { + int16x4_t int_16x4_x = vld1_s16(&x[n]); + int16x4_t int_16x4_in = vld1_s16(&inptr[n]); + int32x4_t int_32x4 = vmull_s16(int_16x4_x, int_16x4_in); + int_32x4 = vshlq_s32(int_32x4, int_32x4_scale); + int_32x4_sum = vaddq_s32(int_32x4_sum, int_32x4); + } + + // Use vector store to avoid long stall from data trasferring + // from vector to general register. + vst1q_s32(vbuff, int_32x4_sum); + csum32 = vbuff[0] + vbuff[1]; + csum32 += vbuff[2]; + csum32 += vbuff[3]; + } +#else + csum32 = 0; + if(scaling == 0) { + for (n = 0; n < PITCH_CORR_LEN2; n++) { + csum32 += x[n] * inptr[n]; + } + } else { + for (n = 0; n < PITCH_CORR_LEN2; n++) { + csum32 += (x[n] * inptr[n]) >> scaling; + } + } +#endif + + logcorQ8--; + + lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8 + lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum); + + if (csum32 > 0) { + lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8 + if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2 + *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum)) + } else { + *logcorQ8 = oneQ8; // 1.00 + } + } else { + *logcorQ8 = 0; + } + } +} diff --git a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_mips.c b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_mips.c new file mode 100644 index 00000000..fa426e98 --- /dev/null +++ b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_mips.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/system_wrappers/interface/compile_assert_c.h" + +extern int32_t WebRtcIsacfix_Log2Q8(uint32_t x); + +void WebRtcIsacfix_PCorr2Q32(const int16_t* in, int32_t* logcorQ8) { + int16_t scaling,n,k; + int32_t ysum32,csum32, lys, lcs; + int32_t oneQ8; + const int16_t* x; + const int16_t* inptr; + + oneQ8 = WEBRTC_SPL_LSHIFT_W32((int32_t)1, 8); // 1.00 in Q8 + x = in + PITCH_MAX_LAG / 2 + 2; + scaling = WebRtcSpl_GetScalingSquare((int16_t*)in, + PITCH_CORR_LEN2, + PITCH_CORR_LEN2); + ysum32 = 1; + csum32 = 0; + x = in + PITCH_MAX_LAG / 2 + 2; + { + const int16_t* tmp_x = x; + const int16_t* tmp_in = in; + int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; + n = PITCH_CORR_LEN2; + COMPILE_ASSERT(PITCH_CORR_LEN2 % 4 == 0); + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[tmp_in]) \n\t" + "lh %[tmp2], 2(%[tmp_in]) \n\t" + "lh %[tmp3], 4(%[tmp_in]) \n\t" + "lh %[tmp4], 6(%[tmp_in]) \n\t" + "lh %[tmp5], 0(%[tmp_x]) \n\t" + "lh %[tmp6], 2(%[tmp_x]) \n\t" + "lh %[tmp7], 4(%[tmp_x]) \n\t" + "lh %[tmp8], 6(%[tmp_x]) \n\t" + "mul %[tmp5], %[tmp1], %[tmp5] \n\t" + "mul %[tmp1], %[tmp1], %[tmp1] \n\t" + "mul %[tmp6], %[tmp2], %[tmp6] \n\t" + "mul %[tmp2], %[tmp2], %[tmp2] \n\t" + "mul %[tmp7], %[tmp3], %[tmp7] \n\t" + "mul %[tmp3], %[tmp3], %[tmp3] \n\t" + "mul %[tmp8], %[tmp4], %[tmp8] \n\t" + "mul %[tmp4], %[tmp4], %[tmp4] \n\t" + "addiu %[n], %[n], -4 \n\t" + "srav %[tmp5], %[tmp5], %[scaling] \n\t" + "srav %[tmp1], %[tmp1], %[scaling] \n\t" + "srav %[tmp6], %[tmp6], %[scaling] \n\t" + "srav %[tmp2], %[tmp2], %[scaling] \n\t" + "srav %[tmp7], %[tmp7], %[scaling] \n\t" + "srav %[tmp3], %[tmp3], %[scaling] \n\t" + "srav %[tmp8], %[tmp8], %[scaling] \n\t" + "srav %[tmp4], %[tmp4], %[scaling] \n\t" + "addu %[ysum32], %[ysum32], %[tmp1] \n\t" + "addu %[csum32], %[csum32], %[tmp5] \n\t" + "addu %[ysum32], %[ysum32], %[tmp2] \n\t" + "addu %[csum32], %[csum32], %[tmp6] \n\t" + "addu %[ysum32], %[ysum32], %[tmp3] \n\t" + "addu %[csum32], %[csum32], %[tmp7] \n\t" + "addu %[ysum32], %[ysum32], %[tmp4] \n\t" + "addu %[csum32], %[csum32], %[tmp8] \n\t" + "addiu %[tmp_in], %[tmp_in], 8 \n\t" + "bgtz %[n], 1b \n\t" + " addiu %[tmp_x], %[tmp_x], 8 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6), + [tmp7] "=&r" (tmp7), [tmp8] "=&r" (tmp8), [tmp_in] "+r" (tmp_in), + [ysum32] "+r" (ysum32), [tmp_x] "+r" (tmp_x), [csum32] "+r" (csum32), + [n] "+r" (n) + : [scaling] "r" (scaling) + : "memory", "hi", "lo" + ); + } + logcorQ8 += PITCH_LAG_SPAN2 - 1; + lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8 + lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum); + if (csum32 > 0) { + lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8 + if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2 in Q8 + *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum)) + } else { + *logcorQ8 = oneQ8; // 1.00 + } + } else { + *logcorQ8 = 0; + } + + for (k = 1; k < PITCH_LAG_SPAN2; k++) { + inptr = &in[k]; + const int16_t* tmp_in1 = &in[k - 1]; + const int16_t* tmp_in2 = &in[PITCH_CORR_LEN2 + k - 1]; + const int16_t* tmp_x = x; + int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; + n = PITCH_CORR_LEN2; + csum32 = 0; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[tmp1], 0(%[tmp_in1]) \n\t" + "lh %[tmp2], 0(%[tmp_in2]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp1] \n\t" + "mul %[tmp2], %[tmp2], %[tmp2] \n\t" + "srav %[tmp1], %[tmp1], %[scaling] \n\t" + "srav %[tmp2], %[tmp2], %[scaling] \n\t" + "subu %[ysum32], %[ysum32], %[tmp1] \n\t" + "bnez %[scaling], 2f \n\t" + " addu %[ysum32], %[ysum32], %[tmp2] \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[inptr]) \n\t" + "lh %[tmp2], 0(%[tmp_x]) \n\t" + "lh %[tmp3], 2(%[inptr]) \n\t" + "lh %[tmp4], 2(%[tmp_x]) \n\t" + "lh %[tmp5], 4(%[inptr]) \n\t" + "lh %[tmp6], 4(%[tmp_x]) \n\t" + "lh %[tmp7], 6(%[inptr]) \n\t" + "lh %[tmp8], 6(%[tmp_x]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp2], %[tmp3], %[tmp4] \n\t" + "mul %[tmp3], %[tmp5], %[tmp6] \n\t" + "mul %[tmp4], %[tmp7], %[tmp8] \n\t" + "addiu %[n], %[n], -4 \n\t" + "addiu %[inptr], %[inptr], 8 \n\t" + "addiu %[tmp_x], %[tmp_x], 8 \n\t" + "addu %[csum32], %[csum32], %[tmp1] \n\t" + "addu %[csum32], %[csum32], %[tmp2] \n\t" + "addu %[csum32], %[csum32], %[tmp3] \n\t" + "bgtz %[n], 1b \n\t" + " addu %[csum32], %[csum32], %[tmp4] \n\t" + "b 3f \n\t" + " nop \n\t" + "2: \n\t" + "lh %[tmp1], 0(%[inptr]) \n\t" + "lh %[tmp2], 0(%[tmp_x]) \n\t" + "lh %[tmp3], 2(%[inptr]) \n\t" + "lh %[tmp4], 2(%[tmp_x]) \n\t" + "lh %[tmp5], 4(%[inptr]) \n\t" + "lh %[tmp6], 4(%[tmp_x]) \n\t" + "lh %[tmp7], 6(%[inptr]) \n\t" + "lh %[tmp8], 6(%[tmp_x]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp2], %[tmp3], %[tmp4] \n\t" + "mul %[tmp3], %[tmp5], %[tmp6] \n\t" + "mul %[tmp4], %[tmp7], %[tmp8] \n\t" + "addiu %[n], %[n], -4 \n\t" + "addiu %[inptr], %[inptr], 8 \n\t" + "addiu %[tmp_x], %[tmp_x], 8 \n\t" + "srav %[tmp1], %[tmp1], %[scaling] \n\t" + "srav %[tmp2], %[tmp2], %[scaling] \n\t" + "srav %[tmp3], %[tmp3], %[scaling] \n\t" + "srav %[tmp4], %[tmp4], %[scaling] \n\t" + "addu %[csum32], %[csum32], %[tmp1] \n\t" + "addu %[csum32], %[csum32], %[tmp2] \n\t" + "addu %[csum32], %[csum32], %[tmp3] \n\t" + "bgtz %[n], 2b \n\t" + " addu %[csum32], %[csum32], %[tmp4] \n\t" + "3: \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6), + [tmp7] "=&r" (tmp7), [tmp8] "=&r" (tmp8), [inptr] "+r" (inptr), + [csum32] "+r" (csum32), [tmp_x] "+r" (tmp_x), [ysum32] "+r" (ysum32), + [n] "+r" (n) + : [tmp_in1] "r" (tmp_in1), [tmp_in2] "r" (tmp_in2), + [scaling] "r" (scaling) + : "memory", "hi", "lo" + ); + + logcorQ8--; + lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8 + lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum); + if (csum32 > 0) { + lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8 + if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2 + *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum)) + } else { + *logcorQ8 = oneQ8; // 1.00 + } + } else { + *logcorQ8 = 0; + } + } +} diff --git a/modules/audio_coding/codecs/isac/fix/source/transform.c b/modules/audio_coding/codecs/isac/fix/source/transform.c index 67e513c7..24ccc821 100644 --- a/modules/audio_coding/codecs/isac/fix/source/transform.c +++ b/modules/audio_coding/codecs/isac/fix/source/transform.c @@ -19,89 +19,13 @@ #include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h" #include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" -#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON) -/* Tables are defined in ARM assembly files. */ +/* Tables are defined in transform_tables.c file or ARM assembly files. */ /* Cosine table 1 in Q14 */ extern const int16_t WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2]; /* Sine table 1 in Q14 */ extern const int16_t WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2]; /* Sine table 2 in Q14 */ extern const int16_t WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4]; -#else -/* Cosine table 1 in Q14 */ -static const int16_t WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2] = { - 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270, - 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880, - 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218, - 15137, 15053, 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295, - 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380, 13255, 13128, - 12998, 12867, 12733, 12597, 12458, 12318, 12176, 12031, 11885, 11736, - 11585, 11433, 11278, 11121, 10963, 10803, 10641, 10477, 10311, 10143, - 9974, 9803, 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377, - 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859, 6664, 6467, - 6270, 6071, 5872, 5671, 5469, 5266, 5063, 4859, 4653, 4447, - 4240, 4033, 3825, 3616, 3406, 3196, 2986, 2775, 2563, 2351, - 2139, 1926, 1713, 1499, 1285, 1072, 857, 643, 429, 214, - 0, -214, -429, -643, -857, -1072, -1285, -1499, -1713, -1926, - -2139, -2351, -2563, -2775, -2986, -3196, -3406, -3616, -3825, -4033, - -4240, -4447, -4653, -4859, -5063, -5266, -5469, -5671, -5872, -6071, - -6270, -6467, -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006, - -8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456, -9630, -9803, - -9974, -10143, -10311, -10477, -10641, -10803, -10963, -11121, -11278, -11433, - -11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733, - -12867, -12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856, - -13970, -14081, -14189, -14295, -14399, -14500, -14598, -14694, -14788, - -14879, -14968, -15053, -15137, -15218, -15296, -15371, -15444, -15515, - -15582, -15647, -15709, -15769, -15826, -15880, -15931, -15980, -16026, - -16069, -16110, -16147, -16182, -16214, -16244, -16270, -16294, -16315, - -16333, -16349, -16362, -16371, -16378, -16383 -}; - -/* Sine table 1 in Q14 */ -static const int16_t WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2] = { - 0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926, - 2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033, - 4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071, - 6270, 6467, 6664, 6859, 7053, 7246, 7438, 7629, 7818, 8006, - 8192, 8377, 8561, 8743, 8923, 9102, 9280, 9456, 9630, 9803, - 9974, 10143, 10311, 10477, 10641, 10803, 10963, 11121, 11278, 11433, - 11585, 11736, 11885, 12031, 12176, 12318, 12458, 12597, 12733, 12867, - 12998, 13128, 13255, 13380, 13502, 13623, 13741, 13856, 13970, 14081, - 14189, 14295, 14399, 14500, 14598, 14694, 14788, 14879, 14968, 15053, - 15137, 15218, 15296, 15371, 15444, 15515, 15582, 15647, 15709, 15769, - 15826, 15880, 15931, 15980, 16026, 16069, 16110, 16147, 16182, 16214, - 16244, 16270, 16294, 16315, 16333, 16349, 16362, 16371, 16378, 16383, - 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270, - 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880, - 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218, - 15137, 15053, 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295, - 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380, 13255, 13128, - 12998, 12867, 12733, 12597, 12458, 12318, 12176, 12031, 11885, 11736, - 11585, 11433, 11278, 11121, 10963, 10803, 10641, 10477, 10311, 10143, - 9974, 9803, 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377, - 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859, 6664, 6467, - 6270, 6071, 5872, 5671, 5469, 5266, 5063, 4859, 4653, 4447, - 4240, 4033, 3825, 3616, 3406, 3196, 2986, 2775, 2563, 2351, - 2139, 1926, 1713, 1499, 1285, 1072, 857, 643, 429, 214 -}; - - -/* Sine table 2 in Q14 */ -static const int16_t WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4] = { - 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257, - 16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853, - 15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178, - 15095, -15011, 14924, -14834, 14741, -14647, 14549, -14449, 14347, -14242, - 14135, -14025, 13913, -13799, 13682, -13563, 13441, -13318, 13192, -13063, - 12933, -12800, 12665, -12528, 12389, -12247, 12104, -11958, 11810, -11661, - 11509, -11356, 11200, -11042, 10883, -10722, 10559, -10394, 10227, -10059, - 9889, -9717, 9543, -9368, 9191, -9013, 8833, -8652, 8469, -8285, - 8099, -7912, 7723, -7534, 7342, -7150, 6957, -6762, 6566, -6369, - 6171, -5971, 5771, -5570, 5368, -5165, 4961, -4756, 4550, -4344, - 4137, -3929, 3720, -3511, 3301, -3091, 2880, -2669, 2457, -2245, - 2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107 -}; -#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON void WebRtcIsacfix_Time2SpecC(int16_t *inre1Q9, int16_t *inre2Q9, diff --git a/modules/audio_coding/codecs/isac/fix/source/transform_mips.c b/modules/audio_coding/codecs/isac/fix/source/transform_mips.c new file mode 100644 index 00000000..bf95ee57 --- /dev/null +++ b/modules/audio_coding/codecs/isac/fix/source/transform_mips.c @@ -0,0 +1,1287 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h" +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" + +// The tables are defined in transform_tables.c file. +extern const int16_t WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2]; +extern const int16_t WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2]; +extern const int16_t WebRtcIsacfix_kCosTab2[FRAMESAMPLES/4]; +extern const int16_t WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4]; + +// MIPS DSPr2 version of the WebRtcIsacfix_Time2Spec function +// is not bit-exact with the C version. +// The accuracy of the MIPS DSPr2 version is same or better. +void WebRtcIsacfix_Time2SpecMIPS(int16_t* inre1Q9, + int16_t* inre2Q9, + int16_t* outreQ7, + int16_t* outimQ7) { + int k = FRAMESAMPLES / 2; + int32_t tmpreQ16[FRAMESAMPLES / 2], tmpimQ16[FRAMESAMPLES / 2]; + int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; + int32_t inre1, inre2, tmpre, tmpim, factor, max, max1; + int16_t* cosptr; + int16_t* sinptr; + + cosptr = (int16_t*)WebRtcIsacfix_kCosTab1; + sinptr = (int16_t*)WebRtcIsacfix_kSinTab1; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[inre1], %[inre1Q9], 0 \n\t" + "addiu %[inre2], %[inre2Q9], 0 \n\t" + "addiu %[tmpre], %[tmpreQ16], 0 \n\t" + "addiu %[tmpim], %[tmpimQ16], 0 \n\t" + "addiu %[factor], $zero, 16921 \n\t" + "mul %[max], $zero, $zero \n\t" + // Multiply with complex exponentials and combine into one complex vector. + // Also, calculate the maximal absolute value in the same loop. + "1: \n\t" +#if defined(MIPS_DSP_R2_LE) + "lwl %[r0], 0(%[inre1]) \n\t" + "lwl %[r2], 0(%[cosptr]) \n\t" + "lwl %[r3], 0(%[sinptr]) \n\t" + "lwl %[r1], 0(%[inre2]) \n\t" + "lwr %[r0], 0(%[inre1]) \n\t" + "lwr %[r2], 0(%[cosptr]) \n\t" + "lwr %[r3], 0(%[sinptr]) \n\t" + "lwr %[r1], 0(%[inre2]) \n\t" + "muleq_s.w.phr %[r4], %[r2], %[r0] \n\t" + "muleq_s.w.phr %[r5], %[r3], %[r0] \n\t" + "muleq_s.w.phr %[r6], %[r3], %[r1] \n\t" + "muleq_s.w.phr %[r7], %[r2], %[r1] \n\t" + "muleq_s.w.phl %[r8], %[r2], %[r0] \n\t" + "muleq_s.w.phl %[r0], %[r3], %[r0] \n\t" + "muleq_s.w.phl %[r3], %[r3], %[r1] \n\t" + "muleq_s.w.phl %[r1], %[r2], %[r1] \n\t" + "addiu %[k], %[k], -2 \n\t" + "addu %[r4], %[r4], %[r6] \n\t" + "subu %[r5], %[r7], %[r5] \n\t" + "sra %[r4], %[r4], 8 \n\t" + "sra %[r5], %[r5], 8 \n\t" + "mult $ac0, %[factor], %[r4] \n\t" + "mult $ac1, %[factor], %[r5] \n\t" + "addu %[r3], %[r8], %[r3] \n\t" + "subu %[r0], %[r1], %[r0] \n\t" + "sra %[r3], %[r3], 8 \n\t" + "sra %[r0], %[r0], 8 \n\t" + "mult $ac2, %[factor], %[r3] \n\t" + "mult $ac3, %[factor], %[r0] \n\t" + "extr_r.w %[r4], $ac0, 16 \n\t" + "extr_r.w %[r5], $ac1, 16 \n\t" + "addiu %[inre1], %[inre1], 4 \n\t" + "addiu %[inre2], %[inre2], 4 \n\t" + "extr_r.w %[r6], $ac2, 16 \n\t" + "extr_r.w %[r7], $ac3, 16 \n\t" + "addiu %[cosptr], %[cosptr], 4 \n\t" + "addiu %[sinptr], %[sinptr], 4 \n\t" + "shra_r.w %[r4], %[r4], 3 \n\t" + "shra_r.w %[r5], %[r5], 3 \n\t" + "sw %[r4], 0(%[tmpre]) \n\t" + "absq_s.w %[r4], %[r4] \n\t" + "sw %[r5], 0(%[tmpim]) \n\t" + "absq_s.w %[r5], %[r5] \n\t" + "shra_r.w %[r6], %[r6], 3 \n\t" + "shra_r.w %[r7], %[r7], 3 \n\t" + "sw %[r6], 4(%[tmpre]) \n\t" + "absq_s.w %[r6], %[r6] \n\t" + "sw %[r7], 4(%[tmpim]) \n\t" + "absq_s.w %[r7], %[r7] \n\t" + "slt %[r0], %[r4], %[r5] \n\t" + "movn %[r4], %[r5], %[r0] \n\t" + "slt %[r1], %[r6], %[r7] \n\t" + "movn %[r6], %[r7], %[r1] \n\t" + "slt %[r0], %[max], %[r4] \n\t" + "movn %[max], %[r4], %[r0] \n\t" + "slt %[r1], %[max], %[r6] \n\t" + "movn %[max], %[r6], %[r1] \n\t" + "addiu %[tmpre], %[tmpre], 8 \n\t" + "bgtz %[k], 1b \n\t" + " addiu %[tmpim], %[tmpim], 8 \n\t" +#else // #if defined(MIPS_DSP_R2_LE) + "lh %[r0], 0(%[inre1]) \n\t" + "lh %[r1], 0(%[inre2]) \n\t" + "lh %[r2], 0(%[cosptr]) \n\t" + "lh %[r3], 0(%[sinptr]) \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[r4], %[r0], %[r2] \n\t" + "mul %[r5], %[r1], %[r3] \n\t" + "mul %[r0], %[r0], %[r3] \n\t" + "mul %[r2], %[r1], %[r2] \n\t" + "addiu %[inre1], %[inre1], 2 \n\t" + "addiu %[inre2], %[inre2], 2 \n\t" + "addiu %[cosptr], %[cosptr], 2 \n\t" + "addiu %[sinptr], %[sinptr], 2 \n\t" + "addu %[r1], %[r4], %[r5] \n\t" + "sra %[r1], %[r1], 7 \n\t" + "sra %[r3], %[r1], 16 \n\t" + "andi %[r1], %[r1], 0xFFFF \n\t" + "sra %[r1], %[r1], 1 \n\t" + "mul %[r1], %[factor], %[r1] \n\t" + "mul %[r3], %[factor], %[r3] \n\t" + "subu %[r0], %[r2], %[r0] \n\t" + "sra %[r0], %[r0], 7 \n\t" + "sra %[r2], %[r0], 16 \n\t" + "andi %[r0], %[r0], 0xFFFF \n\t" + "sra %[r0], %[r0], 1 \n\t" + "mul %[r0], %[factor], %[r0] \n\t" + "mul %[r2], %[factor], %[r2] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r1], %[r1], 15 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r1], %[r1], 0x4000 \n\t" + "sra %[r1], %[r1], 15 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r1], %[r3], %[r1] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r1], %[r1], 3 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r1], %[r1], 4 \n\t" + "sra %[r1], %[r1], 3 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sw %[r1], 0(%[tmpre]) \n\t" + "addiu %[tmpre], %[tmpre], 4 \n\t" +#if defined(MIPS_DSP_R1_LE) + "absq_s.w %[r1], %[r1] \n\t" + "shra_r.w %[r0], %[r0], 15 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "negu %[r4], %[r1] \n\t" + "slt %[r3], %[r1], $zero \n\t" + "movn %[r1], %[r4], %[r3] \n\t" + "addiu %[r0], %[r0], 0x4000 \n\t" + "sra %[r0], %[r0], 15 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r0], %[r0], %[r2] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r0], %[r0], 3 \n\t" + "sw %[r0], 0(%[tmpim]) \n\t" + "absq_s.w %[r0], %[r0] \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 4 \n\t" + "sra %[r0], %[r0], 3 \n\t" + "sw %[r0], 0(%[tmpim]) \n\t" + "negu %[r2], %[r0] \n\t" + "slt %[r3], %[r0], $zero \n\t" + "movn %[r0], %[r2], %[r3] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "slt %[r2], %[max], %[r1] \n\t" + "movn %[max], %[r1], %[r2] \n\t" + "slt %[r2], %[max], %[r0] \n\t" + "movn %[max], %[r0], %[r2] \n\t" + "bgtz %[k], 1b \n\t" + " addiu %[tmpim], %[tmpim], 4 \n\t" +#endif // #if defined(MIPS_DSP_R2_LE) + // Calculate WebRtcSpl_NormW32(max). + // If max gets value >=0, we should shift max steps to the left, and the + // domain will be Q(16+shift). If max gets value <0, we should shift -max + // steps to the right, and the domain will be Q(16+max) + "clz %[max], %[max] \n\t" + "addiu %[max], %[max], -25 \n\t" + ".set pop \n\t" + : [k] "+r" (k), [inre1] "=&r" (inre1), [inre2] "=&r" (inre2), + [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [tmpre] "=&r" (tmpre), + [tmpim] "=&r" (tmpim), [max] "=&r" (max), [factor] "=&r" (factor), +#if defined(MIPS_DSP_R2_LE) + [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8), +#endif // #if defined(MIPS_DSP_R2_LE) + [r5] "=&r" (r5) + : [inre1Q9] "r" (inre1Q9), [inre2Q9] "r" (inre2Q9), + [tmpreQ16] "r" (tmpreQ16), [tmpimQ16] "r" (tmpimQ16), + [cosptr] "r" (cosptr), [sinptr] "r" (sinptr) + : "hi", "lo", "memory" + ); + + // "Fastest" vectors + k = FRAMESAMPLES / 4; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmpre], %[tmpreQ16], 0 \n\t" + "addiu %[tmpim], %[tmpimQ16], 0 \n\t" + "addiu %[inre1], %[inre1Q9], 0 \n\t" + "addiu %[inre2], %[inre2Q9], 0 \n\t" + "blez %[max], 2f \n\t" + " subu %[max1], $zero, %[max] \n\t" + "1: \n\t" + "lw %[r0], 0(%[tmpre]) \n\t" + "lw %[r1], 0(%[tmpim]) \n\t" + "lw %[r2], 4(%[tmpre]) \n\t" + "lw %[r3], 4(%[tmpim]) \n\t" + "addiu %[k], %[k], -1 \n\t" + "sllv %[r0], %[r0], %[max] \n\t" + "sllv %[r1], %[r1], %[max] \n\t" + "sllv %[r2], %[r2], %[max] \n\t" + "sllv %[r3], %[r3], %[max] \n\t" + "addiu %[tmpre], %[tmpre], 8 \n\t" + "addiu %[tmpim], %[tmpim], 8 \n\t" + "sh %[r0], 0(%[inre1]) \n\t" + "sh %[r1], 0(%[inre2]) \n\t" + "sh %[r2], 2(%[inre1]) \n\t" + "sh %[r3], 2(%[inre2]) \n\t" + "addiu %[inre1], %[inre1], 4 \n\t" + "bgtz %[k], 1b \n\t" + " addiu %[inre2], %[inre2], 4 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" +#if !defined(MIPS_DSP_R1_LE) + "addiu %[r4], %[max1], -1 \n\t" + "addiu %[r5], $zero, 1 \n\t" + "sllv %[r4], %[r5], %[r4] \n\t" +#endif // #if !defined(MIPS_DSP_R1_LE) + "3: \n\t" + "lw %[r0], 0(%[tmpre]) \n\t" + "lw %[r1], 0(%[tmpim]) \n\t" + "lw %[r2], 4(%[tmpre]) \n\t" + "lw %[r3], 4(%[tmpim]) \n\t" + "addiu %[k], %[k], -1 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shrav_r.w %[r0], %[r0], %[max1] \n\t" + "shrav_r.w %[r1], %[r1], %[max1] \n\t" + "shrav_r.w %[r2], %[r2], %[max1] \n\t" + "shrav_r.w %[r3], %[r3], %[max1] \n\t" +#else // #if !defined(MIPS_DSP_R1_LE) + "addu %[r0], %[r0], %[r4] \n\t" + "addu %[r1], %[r1], %[r4] \n\t" + "addu %[r2], %[r2], %[r4] \n\t" + "addu %[r3], %[r3], %[r4] \n\t" + "srav %[r0], %[r0], %[max1] \n\t" + "srav %[r1], %[r1], %[max1] \n\t" + "srav %[r2], %[r2], %[max1] \n\t" + "srav %[r3], %[r3], %[max1] \n\t" +#endif // #if !defined(MIPS_DSP_R1_LE) + "addiu %[tmpre], %[tmpre], 8 \n\t" + "addiu %[tmpim], %[tmpim], 8 \n\t" + "sh %[r0], 0(%[inre1]) \n\t" + "sh %[r1], 0(%[inre2]) \n\t" + "sh %[r2], 2(%[inre1]) \n\t" + "sh %[r3], 2(%[inre2]) \n\t" + "addiu %[inre1], %[inre1], 4 \n\t" + "bgtz %[k], 3b \n\t" + " addiu %[inre2], %[inre2], 4 \n\t" + "4: \n\t" + ".set pop \n\t" + : [tmpre] "=&r" (tmpre), [tmpim] "=&r" (tmpim), [inre1] "=&r" (inre1), + [inre2] "=&r" (inre2), [k] "+r" (k), [max1] "=&r" (max1), +#if !defined(MIPS_DSP_R1_LE) + [r4] "=&r" (r4), [r5] "=&r" (r5), +#endif // #if !defined(MIPS_DSP_R1_LE) + [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3) + : [tmpreQ16] "r" (tmpreQ16), [tmpimQ16] "r" (tmpimQ16), + [inre1Q9] "r" (inre1Q9), [inre2Q9] "r" (inre2Q9), [max] "r" (max) + : "memory" + ); + + // Get DFT + WebRtcIsacfix_FftRadix16Fastest(inre1Q9, inre2Q9, -1); // real call + + // "Fastest" vectors and + // Use symmetry to separate into two complex vectors + // and center frames in time around zero + // merged into one loop + cosptr = (int16_t*)WebRtcIsacfix_kCosTab2; + sinptr = (int16_t*)WebRtcIsacfix_kSinTab2; + k = FRAMESAMPLES / 4; + factor = FRAMESAMPLES - 2; // offset for FRAMESAMPLES / 2 - 1 array member + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[inre1], %[inre1Q9], 0 \n\t" + "addiu %[inre2], %[inre2Q9], 0 \n\t" + "addiu %[tmpre], %[outreQ7], 0 \n\t" + "addiu %[tmpim], %[outimQ7], 0 \n\t" + "bltz %[max], 2f \n\t" + " subu %[max1], $zero, %[max] \n\t" + "1: \n\t" +#if !defined(MIPS_DSP_R1_LE) + "addu %[r4], %[inre1], %[offset] \n\t" + "addu %[r5], %[inre2], %[offset] \n\t" +#endif // #if !defined(MIPS_DSP_R1_LE) + "lh %[r0], 0(%[inre1]) \n\t" + "lh %[r1], 0(%[inre2]) \n\t" +#if defined(MIPS_DSP_R1_LE) + "lhx %[r2], %[offset](%[inre1]) \n\t" + "lhx %[r3], %[offset](%[inre2]) \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "lh %[r2], 0(%[r4]) \n\t" + "lh %[r3], 0(%[r5]) \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "srav %[r0], %[r0], %[max] \n\t" + "srav %[r1], %[r1], %[max] \n\t" + "srav %[r2], %[r2], %[max] \n\t" + "srav %[r3], %[r3], %[max] \n\t" + "addu %[r4], %[r0], %[r2] \n\t" + "subu %[r0], %[r2], %[r0] \n\t" + "subu %[r2], %[r1], %[r3] \n\t" + "addu %[r1], %[r1], %[r3] \n\t" + "lh %[r3], 0(%[cosptr]) \n\t" + "lh %[r5], 0(%[sinptr]) \n\t" + "andi %[r6], %[r4], 0xFFFF \n\t" + "sra %[r4], %[r4], 16 \n\t" + "mul %[r7], %[r3], %[r6] \n\t" + "mul %[r8], %[r3], %[r4] \n\t" + "mul %[r6], %[r5], %[r6] \n\t" + "mul %[r4], %[r5], %[r4] \n\t" + "addiu %[k], %[k], -1 \n\t" + "addiu %[inre1], %[inre1], 2 \n\t" + "addiu %[inre2], %[inre2], 2 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r7], %[r7], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r7], %[r7], 0x2000 \n\t" + "sra %[r7], %[r7], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r8], %[r8], 2 \n\t" + "addu %[r8], %[r8], %[r7] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r6], %[r6], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r6], %[r6], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r4], %[r4], 2 \n\t" + "addu %[r4], %[r4], %[r6] \n\t" + "andi %[r6], %[r2], 0xFFFF \n\t" + "sra %[r2], %[r2], 16 \n\t" + "mul %[r7], %[r5], %[r6] \n\t" + "mul %[r9], %[r5], %[r2] \n\t" + "mul %[r6], %[r3], %[r6] \n\t" + "mul %[r2], %[r3], %[r2] \n\t" + "addiu %[cosptr], %[cosptr], 2 \n\t" + "addiu %[sinptr], %[sinptr], 2 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r7], %[r7], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r7], %[r7], 0x2000 \n\t" + "sra %[r7], %[r7], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r9], %[r9], 2 \n\t" + "addu %[r9], %[r7], %[r9] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r6], %[r6], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r6], %[r6], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r2], %[r2], 2 \n\t" + "addu %[r2], %[r6], %[r2] \n\t" + "subu %[r8], %[r8], %[r9] \n\t" + "sra %[r8], %[r8], 9 \n\t" + "addu %[r2], %[r4], %[r2] \n\t" + "sra %[r2], %[r2], 9 \n\t" + "sh %[r8], 0(%[tmpre]) \n\t" + "sh %[r2], 0(%[tmpim]) \n\t" + + "andi %[r4], %[r1], 0xFFFF \n\t" + "sra %[r1], %[r1], 16 \n\t" + "andi %[r6], %[r0], 0xFFFF \n\t" + "sra %[r0], %[r0], 16 \n\t" + "mul %[r7], %[r5], %[r4] \n\t" + "mul %[r9], %[r5], %[r1] \n\t" + "mul %[r4], %[r3], %[r4] \n\t" + "mul %[r1], %[r3], %[r1] \n\t" + "mul %[r8], %[r3], %[r0] \n\t" + "mul %[r3], %[r3], %[r6] \n\t" + "mul %[r6], %[r5], %[r6] \n\t" + "mul %[r0], %[r5], %[r0] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r7], %[r7], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r7], %[r7], 0x2000 \n\t" + "sra %[r7], %[r7], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r9], %[r9], 2 \n\t" + "addu %[r9], %[r9], %[r7] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r4], %[r4], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r4], %[r4], 0x2000 \n\t" + "sra %[r4], %[r4], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r1], %[r1], 2 \n\t" + "addu %[r1], %[r1], %[r4] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r3], %[r3], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r3], %[r3], 0x2000 \n\t" + "sra %[r3], %[r3], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r8], %[r8], 2 \n\t" + "addu %[r8], %[r8], %[r3] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r6], %[r6], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r6], %[r6], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r0], %[r0], 2 \n\t" + "addu %[r0], %[r0], %[r6] \n\t" + "addu %[r3], %[tmpre], %[offset] \n\t" + "addu %[r2], %[tmpim], %[offset] \n\t" + "addu %[r9], %[r9], %[r8] \n\t" + "negu %[r9], %[r9] \n\t" + "sra %[r9], %[r9], 9 \n\t" + "subu %[r0], %[r0], %[r1] \n\t" + "addiu %[offset], %[offset], -4 \n\t" + "sh %[r9], 0(%[r3]) \n\t" + "sh %[r0], 0(%[r2]) \n\t" + "addiu %[tmpre], %[tmpre], 2 \n\t" + "bgtz %[k], 1b \n\t" + " addiu %[tmpim], %[tmpim], 2 \n\t" + "b 3f \n\t" + " nop \n\t" + "2: \n\t" +#if !defined(MIPS_DSP_R1_LE) + "addu %[r4], %[inre1], %[offset] \n\t" + "addu %[r5], %[inre2], %[offset] \n\t" +#endif // #if !defined(MIPS_DSP_R1_LE) + "lh %[r0], 0(%[inre1]) \n\t" + "lh %[r1], 0(%[inre2]) \n\t" +#if defined(MIPS_DSP_R1_LE) + "lhx %[r2], %[offset](%[inre1]) \n\t" + "lhx %[r3], %[offset](%[inre2]) \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "lh %[r2], 0(%[r4]) \n\t" + "lh %[r3], 0(%[r5]) \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sllv %[r0], %[r0], %[max1] \n\t" + "sllv %[r1], %[r1], %[max1] \n\t" + "sllv %[r2], %[r2], %[max1] \n\t" + "sllv %[r3], %[r3], %[max1] \n\t" + "addu %[r4], %[r0], %[r2] \n\t" + "subu %[r0], %[r2], %[r0] \n\t" + "subu %[r2], %[r1], %[r3] \n\t" + "addu %[r1], %[r1], %[r3] \n\t" + "lh %[r3], 0(%[cosptr]) \n\t" + "lh %[r5], 0(%[sinptr]) \n\t" + "andi %[r6], %[r4], 0xFFFF \n\t" + "sra %[r4], %[r4], 16 \n\t" + "mul %[r7], %[r3], %[r6] \n\t" + "mul %[r8], %[r3], %[r4] \n\t" + "mul %[r6], %[r5], %[r6] \n\t" + "mul %[r4], %[r5], %[r4] \n\t" + "addiu %[k], %[k], -1 \n\t" + "addiu %[inre1], %[inre1], 2 \n\t" + "addiu %[inre2], %[inre2], 2 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r7], %[r7], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r7], %[r7], 0x2000 \n\t" + "sra %[r7], %[r7], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r8], %[r8], 2 \n\t" + "addu %[r8], %[r8], %[r7] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r6], %[r6], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r6], %[r6], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r4], %[r4], 2 \n\t" + "addu %[r4], %[r4], %[r6] \n\t" + "andi %[r6], %[r2], 0xFFFF \n\t" + "sra %[r2], %[r2], 16 \n\t" + "mul %[r7], %[r5], %[r6] \n\t" + "mul %[r9], %[r5], %[r2] \n\t" + "mul %[r6], %[r3], %[r6] \n\t" + "mul %[r2], %[r3], %[r2] \n\t" + "addiu %[cosptr], %[cosptr], 2 \n\t" + "addiu %[sinptr], %[sinptr], 2 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r7], %[r7], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r7], %[r7], 0x2000 \n\t" + "sra %[r7], %[r7], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r9], %[r9], 2 \n\t" + "addu %[r9], %[r7], %[r9] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r6], %[r6], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r6], %[r6], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r2], %[r2], 2 \n\t" + "addu %[r2], %[r6], %[r2] \n\t" + "subu %[r8], %[r8], %[r9] \n\t" + "sra %[r8], %[r8], 9 \n\t" + "addu %[r2], %[r4], %[r2] \n\t" + "sra %[r2], %[r2], 9 \n\t" + "sh %[r8], 0(%[tmpre]) \n\t" + "sh %[r2], 0(%[tmpim]) \n\t" + "andi %[r4], %[r1], 0xFFFF \n\t" + "sra %[r1], %[r1], 16 \n\t" + "andi %[r6], %[r0], 0xFFFF \n\t" + "sra %[r0], %[r0], 16 \n\t" + "mul %[r7], %[r5], %[r4] \n\t" + "mul %[r9], %[r5], %[r1] \n\t" + "mul %[r4], %[r3], %[r4] \n\t" + "mul %[r1], %[r3], %[r1] \n\t" + "mul %[r8], %[r3], %[r0] \n\t" + "mul %[r3], %[r3], %[r6] \n\t" + "mul %[r6], %[r5], %[r6] \n\t" + "mul %[r0], %[r5], %[r0] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r7], %[r7], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r7], %[r7], 0x2000 \n\t" + "sra %[r7], %[r7], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r9], %[r9], 2 \n\t" + "addu %[r9], %[r9], %[r7] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r4], %[r4], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r4], %[r4], 0x2000 \n\t" + "sra %[r4], %[r4], 14 \n\t" +#endif + "sll %[r1], %[r1], 2 \n\t" + "addu %[r1], %[r1], %[r4] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r3], %[r3], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r3], %[r3], 0x2000 \n\t" + "sra %[r3], %[r3], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r8], %[r8], 2 \n\t" + "addu %[r8], %[r8], %[r3] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r6], %[r6], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r6], %[r6], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sll %[r0], %[r0], 2 \n\t" + "addu %[r0], %[r0], %[r6] \n\t" + "addu %[r3], %[tmpre], %[offset] \n\t" + "addu %[r2], %[tmpim], %[offset] \n\t" + "addu %[r9], %[r9], %[r8] \n\t" + "negu %[r9], %[r9] \n\t" + "sra %[r9], %[r9], 9 \n\t" + "subu %[r0], %[r0], %[r1] \n\t" + "sra %[r0], %[r0], 9 \n\t" + "addiu %[offset], %[offset], -4 \n\t" + "sh %[r9], 0(%[r3]) \n\t" + "sh %[r0], 0(%[r2]) \n\t" + "addiu %[tmpre], %[tmpre], 2 \n\t" + "bgtz %[k], 2b \n\t" + " addiu %[tmpim], %[tmpim], 2 \n\t" + "3: \n\t" + ".set pop \n\t" + : [inre1] "=&r" (inre1), [inre2] "=&r" (inre2), [tmpre] "=&r" (tmpre), + [tmpim] "=&r" (tmpim), [offset] "+r" (factor), [k] "+r" (k), + [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), + [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), + [r8] "=&r" (r8), [r9] "=&r" (r9), [max1] "=&r" (max1) + : [inre1Q9] "r" (inre1Q9), [inre2Q9] "r" (inre2Q9), + [outreQ7] "r" (outreQ7), [outimQ7] "r" (outimQ7), + [max] "r" (max), [cosptr] "r" (cosptr), [sinptr] "r" (sinptr) + : "hi", "lo", "memory" + ); +} + +void WebRtcIsacfix_Spec2TimeMIPS(int16_t *inreQ7, + int16_t *inimQ7, + int32_t *outre1Q16, + int32_t *outre2Q16) { + int k = FRAMESAMPLES / 4; + int16_t* inre; + int16_t* inim; + int32_t* outre1; + int32_t* outre2; + int16_t* cosptr = (int16_t*)WebRtcIsacfix_kCosTab2; + int16_t* sinptr = (int16_t*)WebRtcIsacfix_kSinTab2; + int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, max, max1; +#if defined(MIPS_DSP_R1_LE) + int32_t offset = FRAMESAMPLES - 4; +#else // #if defined(MIPS_DSP_R1_LE) + int32_t offset = FRAMESAMPLES - 2; +#endif // #if defined(MIPS_DSP_R1_LE) + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[inre], %[inreQ7], 0 \n\t" + "addiu %[inim] , %[inimQ7], 0 \n\t" + "addiu %[outre1], %[outre1Q16], 0 \n\t" + "addiu %[outre2], %[outre2Q16], 0 \n\t" + "mul %[max], $zero, $zero \n\t" + "1: \n\t" +#if defined(MIPS_DSP_R1_LE) + // Process two samples in one iteration avoiding left shift before + // multiplication. MaxAbsValueW32 function inlined into the loop. + "addu %[r8], %[inre], %[offset] \n\t" + "addu %[r9], %[inim], %[offset] \n\t" + "lwl %[r4], 0(%[r8]) \n\t" + "lwl %[r5], 0(%[r9]) \n\t" + "lwl %[r0], 0(%[inre]) \n\t" + "lwl %[r1], 0(%[inim]) \n\t" + "lwl %[r2], 0(%[cosptr]) \n\t" + "lwl %[r3], 0(%[sinptr]) \n\t" + "lwr %[r4], 0(%[r8]) \n\t" + "lwr %[r5], 0(%[r9]) \n\t" + "lwr %[r0], 0(%[inre]) \n\t" + "lwr %[r1], 0(%[inim]) \n\t" + "lwr %[r2], 0(%[cosptr]) \n\t" + "lwr %[r3], 0(%[sinptr]) \n\t" + "packrl.ph %[r4], %[r4], %[r4] \n\t" + "packrl.ph %[r5], %[r5], %[r5] \n\t" + "muleq_s.w.phr %[r6], %[r0], %[r2] \n\t" + "muleq_s.w.phr %[r7], %[r1], %[r3] \n\t" + "muleq_s.w.phr %[r8], %[r4], %[r2] \n\t" + "muleq_s.w.phr %[r9], %[r5], %[r3] \n\t" + "addiu %[k], %[k], -2 \n\t" + "addiu %[cosptr], %[cosptr], 4 \n\t" + "addiu %[sinptr], %[sinptr], 4 \n\t" + "addiu %[inre], %[inre], 4 \n\t" + "addiu %[inim], %[inim], 4 \n\t" + "shra_r.w %[r6], %[r6], 6 \n\t" + "shra_r.w %[r7], %[r7], 6 \n\t" + "shra_r.w %[r8], %[r8], 6 \n\t" + "shra_r.w %[r9], %[r9], 6 \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "subu %[r9], %[r9], %[r8] \n\t" + "subu %[r7], %[r6], %[r9] \n\t" + "addu %[r6], %[r6], %[r9] \n\t" + "sll %[r10], %[offset], 1 \n\t" + "addu %[r10], %[outre1], %[r10] \n\t" + "sw %[r7], 0(%[outre1]) \n\t" + "absq_s.w %[r7], %[r7] \n\t" + "sw %[r6], 4(%[r10]) \n\t" + "absq_s.w %[r6], %[r6] \n\t" + "slt %[r8], %[max], %[r7] \n\t" + "movn %[max], %[r7], %[r8] \n\t" + "slt %[r8], %[max], %[r6] \n\t" + "movn %[max], %[r6], %[r8] \n\t" + "muleq_s.w.phl %[r6], %[r0], %[r2] \n\t" + "muleq_s.w.phl %[r7], %[r1], %[r3] \n\t" + "muleq_s.w.phl %[r8], %[r4], %[r2] \n\t" + "muleq_s.w.phl %[r9], %[r5], %[r3] \n\t" + "shra_r.w %[r6], %[r6], 6 \n\t" + "shra_r.w %[r7], %[r7], 6 \n\t" + "shra_r.w %[r8], %[r8], 6 \n\t" + "shra_r.w %[r9], %[r9], 6 \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "subu %[r9], %[r9], %[r8] \n\t" + "subu %[r7], %[r6], %[r9] \n\t" + "addu %[r6], %[r6], %[r9] \n\t" + "sw %[r7], 4(%[outre1]) \n\t" + "absq_s.w %[r7], %[r7] \n\t" + "sw %[r6], 0(%[r10]) \n\t" + "absq_s.w %[r6], %[r6] \n\t" + "slt %[r8], %[max], %[r7] \n\t" + "movn %[max], %[r7], %[r8] \n\t" + "slt %[r8], %[max], %[r6] \n\t" + "movn %[max], %[r6], %[r8] \n\t" + "muleq_s.w.phr %[r6], %[r1], %[r2] \n\t" + "muleq_s.w.phr %[r7], %[r0], %[r3] \n\t" + "muleq_s.w.phr %[r8], %[r5], %[r2] \n\t" + "muleq_s.w.phr %[r9], %[r4], %[r3] \n\t" + "addiu %[outre1], %[outre1], 8 \n\t" + "shra_r.w %[r6], %[r6], 6 \n\t" + "shra_r.w %[r7], %[r7], 6 \n\t" + "shra_r.w %[r8], %[r8], 6 \n\t" + "shra_r.w %[r9], %[r9], 6 \n\t" + "subu %[r6], %[r6], %[r7] \n\t" + "addu %[r9], %[r9], %[r8] \n\t" + "subu %[r7], %[r6], %[r9] \n\t" + "addu %[r6], %[r9], %[r6] \n\t" + "negu %[r6], %[r6] \n\t" + "sll %[r10], %[offset], 1 \n\t" + "addu %[r10], %[outre2], %[r10] \n\t" + "sw %[r7], 0(%[outre2]) \n\t" + "absq_s.w %[r7], %[r7] \n\t" + "sw %[r6], 4(%[r10]) \n\t" + "absq_s.w %[r6], %[r6] \n\t" + "slt %[r8], %[max], %[r7] \n\t" + "movn %[max], %[r7], %[r8] \n\t" + "slt %[r8], %[max], %[r6] \n\t" + "movn %[max], %[r6], %[r8] \n\t" + "muleq_s.w.phl %[r6], %[r1], %[r2] \n\t" + "muleq_s.w.phl %[r7], %[r0], %[r3] \n\t" + "muleq_s.w.phl %[r8], %[r5], %[r2] \n\t" + "muleq_s.w.phl %[r9], %[r4], %[r3] \n\t" + "addiu %[offset], %[offset], -8 \n\t" + "shra_r.w %[r6], %[r6], 6 \n\t" + "shra_r.w %[r7], %[r7], 6 \n\t" + "shra_r.w %[r8], %[r8], 6 \n\t" + "shra_r.w %[r9], %[r9], 6 \n\t" + "subu %[r6], %[r6], %[r7] \n\t" + "addu %[r9], %[r9], %[r8] \n\t" + "subu %[r7], %[r6], %[r9] \n\t" + "addu %[r6], %[r9], %[r6] \n\t" + "negu %[r6], %[r6] \n\t" + "sw %[r7], 4(%[outre2]) \n\t" + "absq_s.w %[r7], %[r7] \n\t" + "sw %[r6], 0(%[r10]) \n\t" + "absq_s.w %[r6], %[r6] \n\t" + "slt %[r8], %[max], %[r7] \n\t" + "movn %[max], %[r7], %[r8] \n\t" + "slt %[r8], %[max], %[r6] \n\t" + "movn %[max], %[r6], %[r8] \n\t" + "bgtz %[k], 1b \n\t" + " addiu %[outre2], %[outre2], 8 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "lh %[r0], 0(%[inre]) \n\t" + "lh %[r1], 0(%[inim]) \n\t" + "lh %[r4], 0(%[cosptr]) \n\t" + "lh %[r5], 0(%[sinptr]) \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[r2], %[r0], %[r4] \n\t" + "mul %[r0], %[r0], %[r5] \n\t" + "mul %[r3], %[r1], %[r5] \n\t" + "mul %[r1], %[r1], %[r4] \n\t" + "addiu %[cosptr], %[cosptr], 2 \n\t" + "addiu %[sinptr], %[sinptr], 2 \n\t" + "addu %[r8], %[inre], %[offset] \n\t" + "addu %[r9], %[inim], %[offset] \n\t" + "addiu %[r2], %[r2], 16 \n\t" + "sra %[r2], %[r2], 5 \n\t" + "addiu %[r0], %[r0], 16 \n\t" + "sra %[r0], %[r0], 5 \n\t" + "addiu %[r3], %[r3], 16 \n\t" + "sra %[r3], %[r3], 5 \n\t" + "lh %[r6], 0(%[r8]) \n\t" + "lh %[r7], 0(%[r9]) \n\t" + "addiu %[r1], %[r1], 16 \n\t" + "sra %[r1], %[r1], 5 \n\t" + "mul %[r8], %[r7], %[r4] \n\t" + "mul %[r7], %[r7], %[r5] \n\t" + "mul %[r9], %[r6], %[r4] \n\t" + "mul %[r6], %[r6], %[r5] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "subu %[r1], %[r1], %[r0] \n\t" + "sll %[r0], %[offset], 1 \n\t" + "addu %[r4], %[outre1], %[r0] \n\t" + "addu %[r5], %[outre2], %[r0] \n\t" + "addiu %[r8], %[r8], 16 \n\t" + "sra %[r8], %[r8], 5 \n\t" + "addiu %[r7], %[r7], 16 \n\t" + "sra %[r7], %[r7], 5 \n\t" + "addiu %[r6], %[r6], 16 \n\t" + "sra %[r6], %[r6], 5 \n\t" + "addiu %[r9], %[r9], 16 \n\t" + "sra %[r9], %[r9], 5 \n\t" + "addu %[r8], %[r8], %[r6] \n\t" + "negu %[r8], %[r8] \n\t" + "subu %[r7], %[r7], %[r9] \n\t" + "subu %[r6], %[r2], %[r7] \n\t" + "addu %[r0], %[r2], %[r7] \n\t" + "addu %[r3], %[r1], %[r8] \n\t" + "subu %[r1], %[r8], %[r1] \n\t" + "sw %[r6], 0(%[outre1]) \n\t" + "sw %[r0], 0(%[r4]) \n\t" + "sw %[r3], 0(%[outre2]) \n\t" + "sw %[r1], 0(%[r5]) \n\t" + "addiu %[outre1], %[outre1], 4 \n\t" + "addiu %[offset], %[offset], -4 \n\t" + "addiu %[inre], %[inre], 2 \n\t" + "addiu %[inim], %[inim], 2 \n\t" + // Inlined WebRtcSpl_MaxAbsValueW32 + "negu %[r5], %[r6] \n\t" + "slt %[r2], %[r6], $zero \n\t" + "movn %[r6], %[r5], %[r2] \n\t" + "negu %[r5], %[r0] \n\t" + "slt %[r2], %[r0], $zero \n\t" + "movn %[r0], %[r5], %[r2] \n\t" + "negu %[r5], %[r3] \n\t" + "slt %[r2], %[r3], $zero \n\t" + "movn %[r3], %[r5], %[r2] \n\t" + "negu %[r5], %[r1] \n\t" + "slt %[r2], %[r1], $zero \n\t" + "movn %[r1], %[r5], %[r2] \n\t" + "slt %[r2], %[r6], %[r0] \n\t" + "slt %[r5], %[r3], %[r1] \n\t" + "movn %[r6], %[r0], %[r2] \n\t" + "movn %[r3], %[r1], %[r5] \n\t" + "slt %[r2], %[r6], %[r3] \n\t" + "movn %[r6], %[r3], %[r2] \n\t" + "slt %[r2], %[max], %[r6] \n\t" + "movn %[max], %[r6], %[r2] \n\t" + "bgtz %[k], 1b \n\t" + " addiu %[outre2], %[outre2], 4 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "clz %[max], %[max] \n\t" + "addiu %[max], %[max], -25 \n\t" + ".set pop \n\t" + : [inre] "=&r" (inre), [inim] "=&r" (inim), + [outre1] "=&r" (outre1), [outre2] "=&r" (outre2), + [offset] "+r" (offset), [k] "+r" (k), [r0] "=&r" (r0), + [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), + [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), + [r7] "=&r" (r7), [r10] "=&r" (r10), + [r8] "=&r" (r8), [r9] "=&r" (r9), [max] "=&r" (max) + : [inreQ7] "r" (inreQ7), [inimQ7] "r" (inimQ7), + [cosptr] "r" (cosptr), [sinptr] "r" (sinptr), + [outre1Q16] "r" (outre1Q16), [outre2Q16] "r" (outre2Q16) + : "hi", "lo", "memory" + ); + + // "Fastest" vectors + k = FRAMESAMPLES / 4; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[inre], %[inreQ7], 0 \n\t" + "addiu %[inim], %[inimQ7], 0 \n\t" + "addiu %[outre1], %[outre1Q16], 0 \n\t" + "addiu %[outre2], %[outre2Q16], 0 \n\t" + "bltz %[max], 2f \n\t" + " subu %[max1], $zero, %[max] \n\t" + "1: \n\t" + "lw %[r0], 0(%[outre1]) \n\t" + "lw %[r1], 0(%[outre2]) \n\t" + "lw %[r2], 4(%[outre1]) \n\t" + "lw %[r3], 4(%[outre2]) \n\t" + "sllv %[r0], %[r0], %[max] \n\t" + "sllv %[r1], %[r1], %[max] \n\t" + "sllv %[r2], %[r2], %[max] \n\t" + "sllv %[r3], %[r3], %[max] \n\t" + "addiu %[k], %[k], -1 \n\t" + "addiu %[outre1], %[outre1], 8 \n\t" + "addiu %[outre2], %[outre2], 8 \n\t" + "sh %[r0], 0(%[inre]) \n\t" + "sh %[r1], 0(%[inim]) \n\t" + "sh %[r2], 2(%[inre]) \n\t" + "sh %[r3], 2(%[inim]) \n\t" + "addiu %[inre], %[inre], 4 \n\t" + "bgtz %[k], 1b \n\t" + " addiu %[inim], %[inim], 4 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" +#if !defined(MIPS_DSP_R1_LE) + "addiu %[r4], $zero, 1 \n\t" + "addiu %[r5], %[max1], -1 \n\t" + "sllv %[r4], %[r4], %[r5] \n\t" +#endif // #if !defined(MIPS_DSP_R1_LE) + "3: \n\t" + "lw %[r0], 0(%[outre1]) \n\t" + "lw %[r1], 0(%[outre2]) \n\t" + "lw %[r2], 4(%[outre1]) \n\t" + "lw %[r3], 4(%[outre2]) \n\t" +#if defined(MIPS_DSP_R1_LE) + "shrav_r.w %[r0], %[r0], %[max1] \n\t" + "shrav_r.w %[r1], %[r1], %[max1] \n\t" + "shrav_r.w %[r2], %[r2], %[max1] \n\t" + "shrav_r.w %[r3], %[r3], %[max1] \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addu %[r0], %[r0], %[r4] \n\t" + "addu %[r1], %[r1], %[r4] \n\t" + "addu %[r2], %[r2], %[r4] \n\t" + "addu %[r3], %[r3], %[r4] \n\t" + "srav %[r0], %[r0], %[max1] \n\t" + "srav %[r1], %[r1], %[max1] \n\t" + "srav %[r2], %[r2], %[max1] \n\t" + "srav %[r3], %[r3], %[max1] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addiu %[outre1], %[outre1], 8 \n\t" + "addiu %[outre2], %[outre2], 8 \n\t" + "sh %[r0], 0(%[inre]) \n\t" + "sh %[r1], 0(%[inim]) \n\t" + "sh %[r2], 2(%[inre]) \n\t" + "sh %[r3], 2(%[inim]) \n\t" + "addiu %[k], %[k], -1 \n\t" + "addiu %[inre], %[inre], 4 \n\t" + "bgtz %[k], 3b \n\t" + " addiu %[inim], %[inim], 4 \n\t" + "4: \n\t" + ".set pop \n\t" + : [k] "+r" (k), [max1] "=&r" (max1), [r0] "=&r" (r0), + [inre] "=&r" (inre), [inim] "=&r" (inim), + [outre1] "=&r" (outre1), [outre2] "=&r" (outre2), +#if !defined(MIPS_DSP_R1_LE) + [r4] "=&r" (r4), [r5] "=&r" (r5), +#endif // #if !defined(MIPS_DSP_R1_LE) + [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3) + : [max] "r" (max), [inreQ7] "r" (inreQ7), + [inimQ7] "r" (inimQ7), [outre1Q16] "r" (outre1Q16), + [outre2Q16] "r" (outre2Q16) + : "memory" + ); + + WebRtcIsacfix_FftRadix16Fastest(inreQ7, inimQ7, 1); // real call + + // All the remaining processing is done inside a single loop to avoid + // unnecessary memory accesses. MIPS DSPr2 version processes two samples + // at a time. + cosptr = (int16_t*)WebRtcIsacfix_kCosTab1; + sinptr = (int16_t*)WebRtcIsacfix_kSinTab1; + k = FRAMESAMPLES / 2; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[inre], %[inreQ7], 0 \n\t" + "addiu %[inim], %[inimQ7], 0 \n\t" + "addiu %[outre1], %[outre1Q16], 0 \n\t" + "addiu %[outre2], %[outre2Q16], 0 \n\t" + "addiu %[r4], $zero, 273 \n\t" + "addiu %[r5], $zero, 31727 \n\t" +#if defined(MIPS_DSP_R2_LE) + "addiu %[max], %[max], 16 \n\t" + "replv.ph %[r4], %[r4] \n\t" +#endif // #if defined(MIPS_DSP_R2_LE) + "bltz %[max], 2f \n\t" + " subu %[max1], $zero, %[max] \n\t" +#if defined(MIPS_DSP_R2_LE) + "addiu %[max], %[max], 1 \n\t" +#endif // #if defined(MIPS_DSP_R2_LE) + "1: \n\t" +#if defined(MIPS_DSP_R2_LE) + "lwl %[r0], 0(%[inre]) \n\t" + "lwl %[r1], 0(%[inim]) \n\t" + "lh %[r2], 0(%[cosptr]) \n\t" + "lwr %[r0], 0(%[inre]) \n\t" + "lwr %[r1], 0(%[inim]) \n\t" + "lh %[r3], 0(%[sinptr]) \n\t" + "muleq_s.w.phr %[r6], %[r0], %[r4] \n\t" + "muleq_s.w.phr %[r7], %[r1], %[r4] \n\t" + "muleq_s.w.phl %[r0], %[r0], %[r4] \n\t" + "muleq_s.w.phl %[r1], %[r1], %[r4] \n\t" + "addiu %[k], %[k], -2 \n\t" + "addiu %[inre], %[inre], 4 \n\t" + "addiu %[inim], %[inim], 4 \n\t" + "shrav_r.w %[r6], %[r6], %[max] \n\t" + "shrav_r.w %[r7], %[r7], %[max] \n\t" + "mult $ac0, %[r2], %[r6] \n\t" + "mult $ac1, %[r3], %[r7] \n\t" + "mult $ac2, %[r2], %[r7] \n\t" + "mult $ac3, %[r3], %[r6] \n\t" + "lh %[r2], 2(%[cosptr]) \n\t" + "lh %[r3], 2(%[sinptr]) \n\t" + "extr_r.w %[r6], $ac0, 14 \n\t" + "extr_r.w %[r7], $ac1, 14 \n\t" + "extr_r.w %[r8], $ac2, 14 \n\t" + "extr_r.w %[r9], $ac3, 14 \n\t" + "shrav_r.w %[r0], %[r0], %[max] \n\t" + "shrav_r.w %[r1], %[r1], %[max] \n\t" + "mult $ac0, %[r2], %[r0] \n\t" + "mult $ac1, %[r3], %[r1] \n\t" + "mult $ac2, %[r2], %[r1] \n\t" + "mult $ac3, %[r3], %[r0] \n\t" + "addiu %[cosptr], %[cosptr], 4 \n\t" + "extr_r.w %[r0], $ac0, 14 \n\t" + "extr_r.w %[r1], $ac1, 14 \n\t" + "extr_r.w %[r2], $ac2, 14 \n\t" + "extr_r.w %[r3], $ac3, 14 \n\t" + "subu %[r6], %[r6], %[r7] \n\t" + "addu %[r8], %[r8], %[r9] \n\t" + "mult $ac0, %[r5], %[r6] \n\t" + "mult $ac1, %[r5], %[r8] \n\t" + "addiu %[sinptr], %[sinptr], 4 \n\t" + "subu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "extr_r.w %[r1], $ac0, 11 \n\t" + "extr_r.w %[r3], $ac1, 11 \n\t" + "mult $ac2, %[r5], %[r0] \n\t" + "mult $ac3, %[r5], %[r2] \n\t" + "sw %[r1], 0(%[outre1]) \n\t" + "sw %[r3], 0(%[outre2]) \n\t" + "addiu %[outre1], %[outre1], 8 \n\t" + "extr_r.w %[r0], $ac2, 11 \n\t" + "extr_r.w %[r2], $ac3, 11 \n\t" + "sw %[r0], -4(%[outre1]) \n\t" + "sw %[r2], 4(%[outre2]) \n\t" + "bgtz %[k], 1b \n\t" + " addiu %[outre2], %[outre2], 8 \n\t" + "b 3f \n\t" +#else // #if defined(MIPS_DSP_R2_LE) + "lh %[r0], 0(%[inre]) \n\t" + "lh %[r1], 0(%[inim]) \n\t" + "addiu %[k], %[k], -1 \n\t" + "srav %[r0], %[r0], %[max] \n\t" + "srav %[r1], %[r1], %[max] \n\t" + "sra %[r2], %[r0], 16 \n\t" + "andi %[r0], %[r0], 0xFFFF \n\t" + "sra %[r0], %[r0], 1 \n\t" + "sra %[r3], %[r1], 16 \n\t" + "andi %[r1], %[r1], 0xFFFF \n\t" + "sra %[r1], %[r1], 1 \n\t" + "mul %[r2], %[r2], %[r4] \n\t" + "mul %[r0], %[r0], %[r4] \n\t" + "mul %[r3], %[r3], %[r4] \n\t" + "mul %[r1], %[r1], %[r4] \n\t" + "addiu %[inre], %[inre], 2 \n\t" + "addiu %[inim], %[inim], 2 \n\t" + "lh %[r6], 0(%[cosptr]) \n\t" + "lh %[r7], 0(%[sinptr]) \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r0], %[r0], 15 \n\t" + "shra_r.w %[r1], %[r1], 15 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 0x4000 \n\t" + "addiu %[r1], %[r1], 0x4000 \n\t" + "sra %[r0], %[r0], 15 \n\t" + "sra %[r1], %[r1], 15 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r0], %[r2], %[r0] \n\t" + "addu %[r1], %[r3], %[r1] \n\t" + "sra %[r2], %[r0], 16 \n\t" + "andi %[r0], %[r0], 0xFFFF \n\t" + "mul %[r9], %[r2], %[r6] \n\t" + "mul %[r2], %[r2], %[r7] \n\t" + "mul %[r8], %[r0], %[r6] \n\t" + "mul %[r0], %[r0], %[r7] \n\t" + "sra %[r3], %[r3], 16 \n\t" + "andi %[r1], %[r1], 0xFFFF \n\t" + "sll %[r9], %[r9], 2 \n\t" + "sll %[r2], %[r2], 2 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r8], %[r8], 14 \n\t" + "shra_r.w %[r0], %[r0], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r8], %[r8], 0x2000 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r8], %[r8], 14 \n\t" + "sra %[r0], %[r0], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r9], %[r9], %[r8] \n\t" + "addu %[r2], %[r2], %[r0] \n\t" + "mul %[r0], %[r3], %[r6] \n\t" + "mul %[r3], %[r3], %[r7] \n\t" + "mul %[r8], %[r1], %[r6] \n\t" + "mul %[r1], %[r1], %[r8] \n\t" + "addiu %[cosptr], %[cosptr], 2 \n\t" + "addiu %[sinptr], %[sinptr], 2 \n\t" + "sll %[r0], %[r0], 2 \n\t" + "sll %[r3], %[r3], 2 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r8], %[r8], 14 \n\t" + "shra_r.w %[r1], %[r1], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r8], %[r8], 0x2000 \n\t" + "addiu %[r1], %[r1], 0x2000 \n\t" + "sra %[r8], %[r8], 14 \n\t" + "sra %[r1], %[r1], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r0], %[r0], %[r8] \n\t" + "addu %[r3], %[r3], %[r1] \n\t" + "subu %[r9], %[r9], %[r3] \n\t" + "addu %[r0], %[r0], %[r2] \n\t" + "sra %[r1], %[r9], 16 \n\t" + "andi %[r9], %[r9], 0xFFFF \n\t" + "mul %[r1], %[r1], %[r5] \n\t" + "mul %[r9], %[r9], %[r5] \n\t" + "sra %[r2], %[r0], 16 \n\t" + "andi %[r0], %[r0], 0xFFFF \n\t" + "mul %[r2], %[r2], %[r5] \n\t" + "mul %[r0], %[r0], %[r5] \n\t" + "sll %[r1], %[r1], 5 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r9], %[r9], 11 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r9], %[r9], 0x400 \n\t" + "sra %[r9], %[r9], 11 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r1], %[r1], %[r9] \n\t" + "sll %[r2], %[r2], 5 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r0], %[r0], 11 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 0x400 \n\t" + "sra %[r0], %[r0], 11 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r0], %[r0], %[r2] \n\t" + "sw %[r1], 0(%[outre1]) \n\t" + "addiu %[outre1], %[outre1], 4 \n\t" + "sw %[r0], 0(%[outre2]) \n\t" + "bgtz %[k], 1b \n\t" + " addiu %[outre2], %[outre2], 4 \n\t" + "b 3f \n\t" + " nop \n\t" +#endif // #if defined(MIPS_DSP_R2_LE) + "2: \n\t" +#if defined(MIPS_DSP_R2_LE) + "addiu %[max1], %[max1], -1 \n\t" + "21: \n\t" + "lwl %[r0], 0(%[inre]) \n\t" + "lwl %[r1], 0(%[inim]) \n\t" + "lh %[r2], 0(%[cosptr]) \n\t" + "lwr %[r0], 0(%[inre]) \n\t" + "lwr %[r1], 0(%[inim]) \n\t" + "lh %[r3], 0(%[sinptr]) \n\t" + "muleq_s.w.phr %[r6], %[r0], %[r4] \n\t" + "muleq_s.w.phr %[r7], %[r1], %[r4] \n\t" + "muleq_s.w.phl %[r0], %[r0], %[r4] \n\t" + "muleq_s.w.phl %[r1], %[r1], %[r4] \n\t" + "addiu %[k], %[k], -2 \n\t" + "addiu %[inre], %[inre], 4 \n\t" + "addiu %[inim], %[inim], 4 \n\t" + "sllv %[r6], %[r6], %[max1] \n\t" + "sllv %[r7], %[r7], %[max1] \n\t" + "mult $ac0, %[r2], %[r6] \n\t" + "mult $ac1, %[r3], %[r7] \n\t" + "mult $ac2, %[r2], %[r7] \n\t" + "mult $ac3, %[r3], %[r6] \n\t" + "lh %[r2], 2(%[cosptr]) \n\t" + "lh %[r3], 2(%[sinptr]) \n\t" + "extr_r.w %[r6], $ac0, 14 \n\t" + "extr_r.w %[r7], $ac1, 14 \n\t" + "extr_r.w %[r8], $ac2, 14 \n\t" + "extr_r.w %[r9], $ac3, 14 \n\t" + "sllv %[r0], %[r0], %[max1] \n\t" + "sllv %[r1], %[r1], %[max1] \n\t" + "mult $ac0, %[r2], %[r0] \n\t" + "mult $ac1, %[r3], %[r1] \n\t" + "mult $ac2, %[r2], %[r1] \n\t" + "mult $ac3, %[r3], %[r0] \n\t" + "addiu %[cosptr], %[cosptr], 4 \n\t" + "extr_r.w %[r0], $ac0, 14 \n\t" + "extr_r.w %[r1], $ac1, 14 \n\t" + "extr_r.w %[r2], $ac2, 14 \n\t" + "extr_r.w %[r3], $ac3, 14 \n\t" + "subu %[r6], %[r6], %[r7] \n\t" + "addu %[r8], %[r8], %[r9] \n\t" + "mult $ac0, %[r5], %[r6] \n\t" + "mult $ac1, %[r5], %[r8] \n\t" + "addiu %[sinptr], %[sinptr], 4 \n\t" + "subu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "extr_r.w %[r1], $ac0, 11 \n\t" + "extr_r.w %[r3], $ac1, 11 \n\t" + "mult $ac2, %[r5], %[r0] \n\t" + "mult $ac3, %[r5], %[r2] \n\t" + "sw %[r1], 0(%[outre1]) \n\t" + "sw %[r3], 0(%[outre2]) \n\t" + "addiu %[outre1], %[outre1], 8 \n\t" + "extr_r.w %[r0], $ac2, 11 \n\t" + "extr_r.w %[r2], $ac3, 11 \n\t" + "sw %[r0], -4(%[outre1]) \n\t" + "sw %[r2], 4(%[outre2]) \n\t" + "bgtz %[k], 21b \n\t" + " addiu %[outre2], %[outre2], 8 \n\t" + "b 3f \n\t" + " nop \n\t" +#else // #if defined(MIPS_DSP_R2_LE) + "lh %[r0], 0(%[inre]) \n\t" + "lh %[r1], 0(%[inim]) \n\t" + "addiu %[k], %[k], -1 \n\t" + "sllv %[r0], %[r0], %[max1] \n\t" + "sllv %[r1], %[r1], %[max1] \n\t" + "sra %[r2], %[r0], 16 \n\t" + "andi %[r0], %[r0], 0xFFFF \n\t" + "sra %[r0], %[r0], 1 \n\t" + "sra %[r3], %[r1], 16 \n\t" + "andi %[r1], %[r1], 0xFFFF \n\t" + "sra %[r1], %[r1], 1 \n\t" + "mul %[r2], %[r2], %[r4] \n\t" + "mul %[r0], %[r0], %[r4] \n\t" + "mul %[r3], %[r3], %[r4] \n\t" + "mul %[r1], %[r1], %[r4] \n\t" + "addiu %[inre], %[inre], 2 \n\t" + "addiu %[inim], %[inim], 2 \n\t" + "lh %[r6], 0(%[cosptr]) \n\t" + "lh %[r7], 0(%[sinptr]) \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r0], %[r0], 15 \n\t" + "shra_r.w %[r1], %[r1], 15 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 0x4000 \n\t" + "addiu %[r1], %[r1], 0x4000 \n\t" + "sra %[r0], %[r0], 15 \n\t" + "sra %[r1], %[r1], 15 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r0], %[r2], %[r0] \n\t" + "addu %[r1], %[r3], %[r1] \n\t" + "sra %[r2], %[r0], 16 \n\t" + "andi %[r0], %[r0], 0xFFFF \n\t" + "mul %[r9], %[r2], %[r6] \n\t" + "mul %[r2], %[r2], %[r7] \n\t" + "mul %[r8], %[r0], %[r6] \n\t" + "mul %[r0], %[r0], %[r7] \n\t" + "sra %[r3], %[r1], 16 \n\t" + "andi %[r1], %[r1], 0xFFFF \n\t" + "sll %[r9], %[r9], 2 \n\t" + "sll %[r2], %[r2], 2 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r8], %[r8], 14 \n\t" + "shra_r.w %[r0], %[r0], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r8], %[r8], 0x2000 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r8], %[r8], 14 \n\t" + "sra %[r0], %[r0], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r9], %[r9], %[r8] \n\t" + "addu %[r2], %[r2], %[r0] \n\t" + "mul %[r0], %[r3], %[r6] \n\t" + "mul %[r3], %[r3], %[r7] \n\t" + "mul %[r8], %[r1], %[r6] \n\t" + "mul %[r1], %[r1], %[r7] \n\t" + "addiu %[cosptr], %[cosptr], 2 \n\t" + "addiu %[sinptr], %[sinptr], 2 \n\t" + "sll %[r0], %[r0], 2 \n\t" + "sll %[r3], %[r3], 2 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r8], %[r8], 14 \n\t" + "shra_r.w %[r1], %[r1], 14 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r8], %[r8], 0x2000 \n\t" + "addiu %[r1], %[r1], 0x2000 \n\t" + "sra %[r8], %[r8], 14 \n\t" + "sra %[r1], %[r1], 14 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r0], %[r0], %[r8] \n\t" + "addu %[r3], %[r3], %[r1] \n\t" + "subu %[r9], %[r9], %[r3] \n\t" + "addu %[r0], %[r0], %[r2] \n\t" + "sra %[r1], %[r9], 16 \n\t" + "andi %[r9], %[r9], 0xFFFF \n\t" + "mul %[r1], %[r1], %[r5] \n\t" + "mul %[r9], %[r9], %[r5] \n\t" + "sra %[r2], %[r0], 16 \n\t" + "andi %[r0], %[r0], 0xFFFF \n\t" + "mul %[r2], %[r2], %[r5] \n\t" + "mul %[r0], %[r0], %[r5] \n\t" + "sll %[r1], %[r1], 5 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r9], %[r9], 11 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r9], %[r9], 0x400 \n\t" + "sra %[r9], %[r9], 11 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r1], %[r1], %[r9] \n\t" + "sll %[r2], %[r2], 5 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shra_r.w %[r0], %[r0], 11 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 0x400 \n\t" + "sra %[r0], %[r0], 11 \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[r0], %[r0], %[r2] \n\t" + "sw %[r1], 0(%[outre1]) \n\t" + "addiu %[outre1], %[outre1], 4 \n\t" + "sw %[r0], 0(%[outre2]) \n\t" + "bgtz %[k], 2b \n\t" + " addiu %[outre2], %[outre2], 4 \n\t" +#endif // #if defined(MIPS_DSP_R2_LE) + "3: \n\t" + ".set pop \n\t" + : [k] "+r" (k), [r0] "=&r" (r0), [r1] "=&r" (r1), + [r2] "=&r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4), + [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), + [r8] "=&r" (r8), [r9] "=&r" (r9), [max1] "=&r" (max1), + [inre] "=&r" (inre), [inim] "=&r" (inim), + [outre1] "=&r" (outre1), [outre2] "=&r" (outre2) + : [max] "r" (max), [inreQ7] "r" (inreQ7), + [inimQ7] "r" (inimQ7), [cosptr] "r" (cosptr), + [sinptr] "r" (sinptr), [outre1Q16] "r" (outre1Q16), + [outre2Q16] "r" (outre2Q16) + : "hi", "lo", "memory" +#if defined(MIPS_DSP_R2_LE) + , "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" +#endif // #if defined(MIPS_DSP_R2_LE) + ); +} diff --git a/modules/audio_coding/codecs/isac/fix/source/transform_tables.c b/modules/audio_coding/codecs/isac/fix/source/transform_tables.c new file mode 100644 index 00000000..ee96b8e3 --- /dev/null +++ b/modules/audio_coding/codecs/isac/fix/source/transform_tables.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains trigonometric functions look-up tables used in + * transform functions WebRtcIsacfix_Time2Spec and WebRtcIsacfix_Spec2Time. + */ + +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" +#include "webrtc/typedefs.h" + +#if !(defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON) +/* Cosine table 1 in Q14. */ +const int16_t WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2] = { + 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270, + 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880, + 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218, + 15137, 15053, 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295, + 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380, 13255, 13128, + 12998, 12867, 12733, 12597, 12458, 12318, 12176, 12031, 11885, 11736, + 11585, 11433, 11278, 11121, 10963, 10803, 10641, 10477, 10311, 10143, + 9974, 9803, 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377, + 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859, 6664, 6467, + 6270, 6071, 5872, 5671, 5469, 5266, 5063, 4859, 4653, 4447, + 4240, 4033, 3825, 3616, 3406, 3196, 2986, 2775, 2563, 2351, + 2139, 1926, 1713, 1499, 1285, 1072, 857, 643, 429, 214, + 0, -214, -429, -643, -857, -1072, -1285, -1499, -1713, -1926, + -2139, -2351, -2563, -2775, -2986, -3196, -3406, -3616, -3825, -4033, + -4240, -4447, -4653, -4859, -5063, -5266, -5469, -5671, -5872, -6071, + -6270, -6467, -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006, + -8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456, -9630, -9803, + -9974, -10143, -10311, -10477, -10641, -10803, -10963, -11121, -11278, -11433, + -11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733, + -12867, -12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856, + -13970, -14081, -14189, -14295, -14399, -14500, -14598, -14694, -14788, + -14879, -14968, -15053, -15137, -15218, -15296, -15371, -15444, -15515, + -15582, -15647, -15709, -15769, -15826, -15880, -15931, -15980, -16026, + -16069, -16110, -16147, -16182, -16214, -16244, -16270, -16294, -16315, + -16333, -16349, -16362, -16371, -16378, -16383 +}; + +/* Sine table 1 in Q14. */ +const int16_t WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2] = { + 0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926, + 2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033, + 4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071, + 6270, 6467, 6664, 6859, 7053, 7246, 7438, 7629, 7818, 8006, + 8192, 8377, 8561, 8743, 8923, 9102, 9280, 9456, 9630, 9803, + 9974, 10143, 10311, 10477, 10641, 10803, 10963, 11121, 11278, 11433, + 11585, 11736, 11885, 12031, 12176, 12318, 12458, 12597, 12733, 12867, + 12998, 13128, 13255, 13380, 13502, 13623, 13741, 13856, 13970, 14081, + 14189, 14295, 14399, 14500, 14598, 14694, 14788, 14879, 14968, 15053, + 15137, 15218, 15296, 15371, 15444, 15515, 15582, 15647, 15709, 15769, + 15826, 15880, 15931, 15980, 16026, 16069, 16110, 16147, 16182, 16214, + 16244, 16270, 16294, 16315, 16333, 16349, 16362, 16371, 16378, 16383, + 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270, + 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880, + 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218, + 15137, 15053, 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295, + 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380, 13255, 13128, + 12998, 12867, 12733, 12597, 12458, 12318, 12176, 12031, 11885, 11736, + 11585, 11433, 11278, 11121, 10963, 10803, 10641, 10477, 10311, 10143, + 9974, 9803, 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377, + 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859, 6664, 6467, + 6270, 6071, 5872, 5671, 5469, 5266, 5063, 4859, 4653, 4447, + 4240, 4033, 3825, 3616, 3406, 3196, 2986, 2775, 2563, 2351, + 2139, 1926, 1713, 1499, 1285, 1072, 857, 643, 429, 214 +}; + + +/* Sine table 2 in Q14. */ +const int16_t WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4] = { + 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257, + 16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853, + 15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178, + 15095, -15011, 14924, -14834, 14741, -14647, 14549, -14449, 14347, -14242, + 14135, -14025, 13913, -13799, 13682, -13563, 13441, -13318, 13192, -13063, + 12933, -12800, 12665, -12528, 12389, -12247, 12104, -11958, 11810, -11661, + 11509, -11356, 11200, -11042, 10883, -10722, 10559, -10394, 10227, -10059, + 9889, -9717, 9543, -9368, 9191, -9013, 8833, -8652, 8469, -8285, + 8099, -7912, 7723, -7534, 7342, -7150, 6957, -6762, 6566, -6369, + 6171, -5971, 5771, -5570, 5368, -5165, 4961, -4756, 4550, -4344, + 4137, -3929, 3720, -3511, 3301, -3091, 2880, -2669, 2457, -2245, + 2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107 +}; +#endif + +#if defined(MIPS32_LE) +/* Cosine table 2 in Q14. Used only on MIPS platforms. */ +const int16_t WebRtcIsacfix_kCosTab2[FRAMESAMPLES/4] = { + 107, -322, 536, -750, 965, -1179, 1392, -1606, 1819, -2032, + 2245, -2457, 2669, -2880, 3091, -3301, 3511, -3720, 3929, -4137, + 4344, -4550, 4756, -4961, 5165, -5368, 5570, -5771, 5971, -6171, + 6369, -6566, 6762, -6957, 7150, -7342, 7534, -7723, 7912, -8099, + 8285, -8469, 8652, -8833, 9013, -9191, 9368, -9543, 9717, -9889, + 10059, -10227, 10394, -10559, 10722, -10883, 11042, -11200, 11356, -11509, + 11661, -11810, 11958, -12104, 12247, -12389, 12528, -12665, 12800, -12933, + 13063, -13192, 13318, -13441, 13563, -13682, 13799, -13913, 14025, -14135, + 14242, -14347, 14449, -14549, 14647, -14741, 14834, -14924, 15011, -15095, + 15178, -15257, 15334, -15408, 15480, -15549, 15615, -15679, 15739, -15798, + 15853, -15906, 15956, -16003, 16048, -16090, 16129, -16165, 16199, -16229, + 16257, -16283, 16305, -16325, 16342, -16356, 16367, -16375, 16381, -16384 +}; +#endif |