summaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorandrew@webrtc.org <andrew@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>2014-07-21 16:43:13 +0000
committerandrew@webrtc.org <andrew@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>2014-07-21 16:43:13 +0000
commite4834e042a794dbf042bfd0934e335598d7ce4b8 (patch)
tree3770a2234bf16beb8d934d943c9b8f0f27724225 /modules
parentf0a119fa99a8c384c30f3eb049c54091a6a1a828 (diff)
downloadwebrtc-e4834e042a794dbf042bfd0934e335598d7ce4b8.tar.gz
MIPS optimizations for ISAC (patch #2)
Implemented functions: - WebRtcIsacfix_CalculateResidualEnergy - WebRtcIsacfix_Spec2Time - WebRtcIsacfix_Time2Spec - WebRtcIsacfix_HighpassFilterFixDec32 - WebRtcIsacfix_PCorr2Q32 Gain achieved: aprox. further 5% on top of patch#1 on ISAC encoding path. The optimizations are bit-exact to the C code, with the excception of the MIPS DSPr2 variant of the WebRtcIsacfix_Time2Spec function (the accuracy of the WebRtcIsacfix_Time2Spec MIPS DSPr2 variant is same or better than C variant). Code verification and improvement achieved have been determined using the iSACFixtest application. R=andrew@webrtc.org, tina.legrand@webrtc.org Review URL: https://webrtc-codereview.appspot.com/19749004 Patch from Ljubomir Papuga <lpapuga@mips.com>. git-svn-id: http://webrtc.googlecode.com/svn/trunk/webrtc@6749 4adac7df-926f-26a2-2b94-8c16560cd09d
Diffstat (limited to 'modules')
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/codec.h10
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h21
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/filterbanks.c11
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c172
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/filterbanks_unittest.cc7
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/isacfix.c11
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/isacfix.gypi6
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h9
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/lpc_masking_model_mips.c237
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/pitch_estimator.c133
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h4
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/pitch_estimator_c.c122
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/pitch_estimator_mips.c196
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/transform.c78
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/transform_mips.c1287
-rw-r--r--modules/audio_coding/codecs/isac/fix/source/transform_tables.c111
16 files changed, 2198 insertions, 217 deletions
diff --git a/modules/audio_coding/codecs/isac/fix/source/codec.h b/modules/audio_coding/codecs/isac/fix/source/codec.h
index 2f649324..a38c6e56 100644
--- a/modules/audio_coding/codecs/isac/fix/source/codec.h
+++ b/modules/audio_coding/codecs/isac/fix/source/codec.h
@@ -101,6 +101,16 @@ void WebRtcIsacfix_Spec2TimeNeon(int16_t* inreQ7,
int32_t* outre2Q16);
#endif
+#if defined(MIPS32_LE)
+void WebRtcIsacfix_Time2SpecMIPS(int16_t* inre1Q9,
+ int16_t* inre2Q9,
+ int16_t* outre,
+ int16_t* outim);
+void WebRtcIsacfix_Spec2TimeMIPS(int16_t* inreQ7,
+ int16_t* inimQ7,
+ int32_t* outre1Q16,
+ int32_t* outre2Q16);
+#endif
/* filterbank functions */
diff --git a/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h b/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h
index 3fefc1a5..7a5f7462 100644
--- a/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h
+++ b/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h
@@ -23,10 +23,23 @@ extern "C" {
* coefficient: Input.
* state: Input/output, filter state, in Q4.
*/
-void WebRtcIsacfix_HighpassFilterFixDec32(int16_t *io,
- int16_t len,
- const int16_t *coefficient,
- int32_t *state);
+typedef void (*HighpassFilterFixDec32)(int16_t* io,
+ int16_t len,
+ const int16_t* coefficient,
+ int32_t* state);
+extern HighpassFilterFixDec32 WebRtcIsacfix_HighpassFilterFixDec32;
+
+void WebRtcIsacfix_HighpassFilterFixDec32C(int16_t* io,
+ int16_t len,
+ const int16_t* coefficient,
+ int32_t* state);
+
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcIsacfix_HighpassFilterFixDec32MIPS(int16_t* io,
+ int16_t len,
+ const int16_t* coefficient,
+ int32_t* state);
+#endif
typedef void (*AllpassFilter2FixDec16)(
int16_t *data_ch1, // Input and output in channel 1, in Q0
diff --git a/modules/audio_coding/codecs/isac/fix/source/filterbanks.c b/modules/audio_coding/codecs/isac/fix/source/filterbanks.c
index 64557e13..1928a7cb 100644
--- a/modules/audio_coding/codecs/isac/fix/source/filterbanks.c
+++ b/modules/audio_coding/codecs/isac/fix/source/filterbanks.c
@@ -86,10 +86,13 @@ void WebRtcIsacfix_AllpassFilter2FixDec16C(
filter_state_ch2[1] = state1_ch2;
}
-void WebRtcIsacfix_HighpassFilterFixDec32(int16_t *io,
- int16_t len,
- const int16_t *coefficient,
- int32_t *state)
+// Declare a function pointer.
+HighpassFilterFixDec32 WebRtcIsacfix_HighpassFilterFixDec32;
+
+void WebRtcIsacfix_HighpassFilterFixDec32C(int16_t *io,
+ int16_t len,
+ const int16_t *coefficient,
+ int32_t *state)
{
int k;
int32_t a1 = 0, b1 = 0, c = 0, in = 0;
diff --git a/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c b/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c
index 1887745b..4dd70cf6 100644
--- a/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c
+++ b/modules/audio_coding/codecs/isac/fix/source/filterbanks_mips.c
@@ -10,26 +10,26 @@
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h"
-// WebRtcIsacfix_AllpassFilter2FixDec16 function optimized for MIPSDSP platform
-// Bit-exact with WebRtcIsacfix_AllpassFilter2FixDec16C from filterbanks.c
+// WebRtcIsacfix_AllpassFilter2FixDec16 function optimized for MIPSDSP platform.
+// Bit-exact with WebRtcIsacfix_AllpassFilter2FixDec16C from filterbanks.c.
void WebRtcIsacfix_AllpassFilter2FixDec16MIPS(
- int16_t *data_ch1, // Input and output in channel 1, in Q0
- int16_t *data_ch2, // Input and output in channel 2, in Q0
- const int16_t *factor_ch1, // Scaling factor for channel 1, in Q15
- const int16_t *factor_ch2, // Scaling factor for channel 2, in Q15
- const int length, // Length of the data buffers
- int32_t *filter_state_ch1, // Filter state for channel 1, in Q16
- int32_t *filter_state_ch2) { // Filter state for channel 2, in Q16
+ int16_t* data_ch1, // Input and output in channel 1, in Q0.
+ int16_t* data_ch2, // Input and output in channel 2, in Q0.
+ const int16_t* factor_ch1, // Scaling factor for channel 1, in Q15.
+ const int16_t* factor_ch2, // Scaling factor for channel 2, in Q15.
+ const int length, // Length of the data buffers.
+ int32_t* filter_state_ch1, // Filter state for channel 1, in Q16.
+ int32_t* filter_state_ch2) { // Filter state for channel 2, in Q16.
- int32_t st0_ch1, st1_ch1; // channel1 state variables
- int32_t st0_ch2, st1_ch2; // channel2 state variables
- int32_t f_ch10, f_ch11, f_ch20, f_ch21; // factor variables
- int32_t r0, r1, r2, r3, r4, r5; // temporary ragister variables
+ int32_t st0_ch1, st1_ch1; // channel1 state variables.
+ int32_t st0_ch2, st1_ch2; // channel2 state variables.
+ int32_t f_ch10, f_ch11, f_ch20, f_ch21; // factor variables.
+ int32_t r0, r1, r2, r3, r4, r5; // temporary register variables.
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
- // Load all the state and factor variables
+ // Load all the state and factor variables.
"lh %[f_ch10], 0(%[factor_ch1]) \n\t"
"lh %[f_ch20], 0(%[factor_ch2]) \n\t"
"lh %[f_ch11], 2(%[factor_ch1]) \n\t"
@@ -38,7 +38,7 @@ void WebRtcIsacfix_AllpassFilter2FixDec16MIPS(
"lw %[st1_ch1], 4(%[filter_state_ch1]) \n\t"
"lw %[st0_ch2], 0(%[filter_state_ch2]) \n\t"
"lw %[st1_ch2], 4(%[filter_state_ch2]) \n\t"
- // Allpass filtering loop
+ // Allpass filtering loop.
"1: \n\t"
"lh %[r0], 0(%[data_ch1]) \n\t"
"lh %[r1], 0(%[data_ch2]) \n\t"
@@ -80,7 +80,7 @@ void WebRtcIsacfix_AllpassFilter2FixDec16MIPS(
"subq_s.w %[st1_ch2], %[r3], %[r1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[data_ch2], %[data_ch2], 2 \n\t"
- // Store channel states
+ // Store channel states.
"sw %[st0_ch1], 0(%[filter_state_ch1]) \n\t"
"sw %[st1_ch1], 4(%[filter_state_ch1]) \n\t"
"sw %[st0_ch2], 0(%[filter_state_ch2]) \n\t"
@@ -100,3 +100,143 @@ void WebRtcIsacfix_AllpassFilter2FixDec16MIPS(
: "memory", "hi", "lo"
);
}
+
+// WebRtcIsacfix_HighpassFilterFixDec32 function optimized for MIPSDSP platform.
+// Bit-exact with WebRtcIsacfix_HighpassFilterFixDec32C from filterbanks.c.
+void WebRtcIsacfix_HighpassFilterFixDec32MIPS(int16_t* io,
+ int16_t len,
+ const int16_t* coefficient,
+ int32_t* state) {
+ int k;
+ int32_t a1, a2, b1, b2, in;
+ int32_t state0 = state[0];
+ int32_t state1 = state[1];
+
+ int32_t c0, c1, c2, c3;
+ int32_t c4, c5, c6, c7;
+ int32_t state0_lo, state0_hi;
+ int32_t state1_lo, state1_hi;
+ int32_t t0, t1, t2, t3, t4, t5;
+
+ __asm __volatile (
+ "lh %[c0], 0(%[coeff_ptr]) \n\t"
+ "lh %[c1], 2(%[coeff_ptr]) \n\t"
+ "lh %[c2], 4(%[coeff_ptr]) \n\t"
+ "lh %[c3], 6(%[coeff_ptr]) \n\t"
+ "sra %[state0_hi], %[state0], 16 \n\t"
+ "sra %[state1_hi], %[state1], 16 \n\t"
+ "andi %[state0_lo], %[state0], 0xFFFF \n\t"
+ "andi %[state1_lo], %[state1], 0xFFFF \n\t"
+ "lh %[c4], 8(%[coeff_ptr]) \n\t"
+ "lh %[c5], 10(%[coeff_ptr]) \n\t"
+ "lh %[c6], 12(%[coeff_ptr]) \n\t"
+ "lh %[c7], 14(%[coeff_ptr]) \n\t"
+ "sra %[state0_lo], %[state0_lo], 1 \n\t"
+ "sra %[state1_lo], %[state1_lo], 1 \n\t"
+ : [c0] "=&r" (c0), [c1] "=&r" (c1), [c2] "=&r" (c2), [c3] "=&r" (c3),
+ [c4] "=&r" (c4), [c5] "=&r" (c5), [c6] "=&r" (c6), [c7] "=&r" (c7),
+ [state0_hi] "=&r" (state0_hi), [state0_lo] "=&r" (state0_lo),
+ [state1_hi] "=&r" (state1_hi), [state1_lo] "=&r" (state1_lo)
+ : [coeff_ptr] "r" (coefficient), [state0] "r" (state0),
+ [state1] "r" (state1)
+ : "memory"
+ );
+
+ for (k = 0; k < len; k++) {
+ in = (int32_t)io[k];
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "mul %[t2], %[c4], %[state0_lo] \n\t"
+ "mul %[t0], %[c5], %[state0_lo] \n\t"
+ "mul %[t1], %[c4], %[state0_hi] \n\t"
+ "mul %[a1], %[c5], %[state0_hi] \n\t"
+ "mul %[t5], %[c6], %[state1_lo] \n\t"
+ "mul %[t3], %[c7], %[state1_lo] \n\t"
+ "mul %[t4], %[c6], %[state1_hi] \n\t"
+ "mul %[b1], %[c7], %[state1_hi] \n\t"
+ "shra_r.w %[t2], %[t2], 15 \n\t"
+ "shra_r.w %[t0], %[t0], 15 \n\t"
+ "addu %[t1], %[t1], %[t2] \n\t"
+ "addu %[a1], %[a1], %[t0] \n\t"
+ "sra %[t1], %[t1], 16 \n\t"
+ "addu %[a1], %[a1], %[t1] \n\t"
+ "shra_r.w %[t5], %[t5], 15 \n\t"
+ "shra_r.w %[t3], %[t3], 15 \n\t"
+ "addu %[t4], %[t4], %[t5] \n\t"
+ "addu %[b1], %[b1], %[t3] \n\t"
+ "sra %[t4], %[t4], 16 \n\t"
+ "addu %[b1], %[b1], %[t4] \n\t"
+ "mul %[t2], %[c0], %[state0_lo] \n\t"
+ "mul %[t0], %[c1], %[state0_lo] \n\t"
+ "mul %[t1], %[c0], %[state0_hi] \n\t"
+ "mul %[a2], %[c1], %[state0_hi] \n\t"
+ "mul %[t5], %[c2], %[state1_lo] \n\t"
+ "mul %[t3], %[c3], %[state1_lo] \n\t"
+ "mul %[t4], %[c2], %[state1_hi] \n\t"
+ "mul %[b2], %[c3], %[state1_hi] \n\t"
+ "shra_r.w %[t2], %[t2], 15 \n\t"
+ "shra_r.w %[t0], %[t0], 15 \n\t"
+ "addu %[t1], %[t1], %[t2] \n\t"
+ "addu %[a2], %[a2], %[t0] \n\t"
+ "sra %[t1], %[t1], 16 \n\t"
+ "addu %[a2], %[a2], %[t1] \n\t"
+ "shra_r.w %[t5], %[t5], 15 \n\t"
+ "shra_r.w %[t3], %[t3], 15 \n\t"
+ "addu %[t4], %[t4], %[t5] \n\t"
+ "addu %[b2], %[b2], %[t3] \n\t"
+ "sra %[t4], %[t4], 16 \n\t"
+ "addu %[b2], %[b2], %[t4] \n\t"
+ "addu %[a1], %[a1], %[b1] \n\t"
+ "sra %[a1], %[a1], 7 \n\t"
+ "addu %[a1], %[a1], %[in] \n\t"
+ "sll %[t0], %[in], 2 \n\t"
+ "addu %[a2], %[a2], %[b2] \n\t"
+ "subu %[t0], %[t0], %[a2] \n\t"
+ "shll_s.w %[a1], %[a1], 16 \n\t"
+ "shll_s.w %[t0], %[t0], 2 \n\t"
+ "sra %[a1], %[a1], 16 \n\t"
+ "addu %[state1_hi], %[state0_hi], $0 \n\t"
+ "addu %[state1_lo], %[state0_lo], $0 \n\t"
+ "sra %[state0_hi], %[t0], 16 \n\t"
+ "andi %[state0_lo], %[t0], 0xFFFF \n\t"
+ "sra %[state0_lo], %[state0_lo], 1 \n\t"
+ ".set pop \n\t"
+ : [a1] "=&r" (a1), [b1] "=&r" (b1), [a2] "=&r" (a2), [b2] "=&r" (b2),
+ [state0_hi] "+r" (state0_hi), [state0_lo] "+r" (state0_lo),
+ [state1_hi] "+r" (state1_hi), [state1_lo] "+r" (state1_lo),
+ [t0] "=&r" (t0), [t1] "=&r" (t1), [t2] "=&r" (t2),
+ [t3] "=&r" (t3), [t4] "=&r" (t4), [t5] "=&r" (t5)
+ : [c0] "r" (c0), [c1] "r" (c1), [c2] "r" (c2), [c3] "r" (c3),
+ [c4] "r" (c4), [c5] "r" (c5), [c6] "r" (c6), [c7] "r" (c7),
+ [in] "r" (in)
+ : "hi", "lo"
+ );
+ io[k] = (int16_t)a1;
+ }
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+#if !defined(MIPS_DSP_R2_LE)
+ "sll %[state0_hi], %[state0_hi], 16 \n\t"
+ "sll %[state0_lo], %[state0_lo], 1 \n\t"
+ "sll %[state1_hi], %[state1_hi], 16 \n\t"
+ "sll %[state1_lo], %[state1_lo], 1 \n\t"
+ "or %[state0_hi], %[state0_hi], %[state0_lo] \n\t"
+ "or %[state1_hi], %[state1_hi], %[state1_lo] \n\t"
+#else
+ "sll %[state0_lo], %[state0_lo], 1 \n\t"
+ "sll %[state1_lo], %[state1_lo], 1 \n\t"
+ "precr_sra.ph.w %[state0_hi], %[state0_lo], 0 \n\t"
+ "precr_sra.ph.w %[state1_hi], %[state1_lo], 0 \n\t"
+#endif
+ "sw %[state0_hi], 0(%[state]) \n\t"
+ "sw %[state1_hi], 4(%[state]) \n\t"
+ ".set pop \n\t"
+ : [state0_hi] "+r" (state0_hi), [state0_lo] "+r" (state0_lo),
+ [state1_hi] "+r" (state1_hi), [state1_lo] "+r" (state1_lo)
+ : [state] "r" (state)
+ : "memory"
+ );
+}
diff --git a/modules/audio_coding/codecs/isac/fix/source/filterbanks_unittest.cc b/modules/audio_coding/codecs/isac/fix/source/filterbanks_unittest.cc
index d7484277..d15318a7 100644
--- a/modules/audio_coding/codecs/isac/fix/source/filterbanks_unittest.cc
+++ b/modules/audio_coding/codecs/isac/fix/source/filterbanks_unittest.cc
@@ -86,6 +86,13 @@ TEST_F(FilterBanksTest, HighpassFilterFixDec32Test) {
-1280, -8554, -14496, -7561, -23541, -27263, -30560, -32768, -3441, -32768,
25203, -27550, 22419};
#endif
+ HighpassFilterFixDec32 WebRtcIsacfix_HighpassFilterFixDec32;
+#if defined(MIPS_DSP_R1_LE)
+ WebRtcIsacfix_HighpassFilterFixDec32 =
+ WebRtcIsacfix_HighpassFilterFixDec32MIPS;
+#else
+ WebRtcIsacfix_HighpassFilterFixDec32 = WebRtcIsacfix_HighpassFilterFixDec32C;
+#endif
for (int i = 0; i < kSamples; i++) {
in[i] = WEBRTC_SPL_WORD32_MAX / (i + 1);
diff --git a/modules/audio_coding/codecs/isac/fix/source/isacfix.c b/modules/audio_coding/codecs/isac/fix/source/isacfix.c
index 76359080..887a7ba2 100644
--- a/modules/audio_coding/codecs/isac/fix/source/isacfix.c
+++ b/modules/audio_coding/codecs/isac/fix/source/isacfix.c
@@ -209,9 +209,17 @@ static void WebRtcIsacfix_InitNeon(void) {
static void WebRtcIsacfix_InitMIPS(void) {
WebRtcIsacfix_AutocorrFix = WebRtcIsacfix_AutocorrMIPS;
WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopMIPS;
+ WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeMIPS;
+ WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecMIPS;
#if defined(MIPS_DSP_R1_LE)
WebRtcIsacfix_AllpassFilter2FixDec16 =
WebRtcIsacfix_AllpassFilter2FixDec16MIPS;
+ WebRtcIsacfix_HighpassFilterFixDec32 =
+ WebRtcIsacfix_HighpassFilterFixDec32MIPS;
+#endif
+#if defined(MIPS_DSP_R2_LE)
+ WebRtcIsacfix_CalculateResidualEnergy =
+ WebRtcIsacfix_CalculateResidualEnergyMIPS;
#endif
}
#endif
@@ -300,10 +308,11 @@ int16_t WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst,
WebRtcIsacfix_CalculateResidualEnergy =
WebRtcIsacfix_CalculateResidualEnergyC;
WebRtcIsacfix_AllpassFilter2FixDec16 = WebRtcIsacfix_AllpassFilter2FixDec16C;
+ WebRtcIsacfix_HighpassFilterFixDec32 = WebRtcIsacfix_HighpassFilterFixDec32C;
WebRtcIsacfix_Time2Spec = WebRtcIsacfix_Time2SpecC;
WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeC;
WebRtcIsacfix_MatrixProduct1 = WebRtcIsacfix_MatrixProduct1C;
- WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2C ;
+ WebRtcIsacfix_MatrixProduct2 = WebRtcIsacfix_MatrixProduct2C;
#ifdef WEBRTC_DETECT_ARM_NEON
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
diff --git a/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi b/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi
index a18a803d..e5aade65 100644
--- a/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi
+++ b/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi
@@ -47,12 +47,14 @@
'lpc_masking_model.c',
'lpc_tables.c',
'pitch_estimator.c',
+ 'pitch_estimator_c.c',
'pitch_filter.c',
'pitch_filter_c.c',
'pitch_gain_tables.c',
'pitch_lag_tables.c',
'spectrum_ar_model_tables.c',
'transform.c',
+ 'transform_tables.c',
'arith_routins.h',
'bandwidth_estimator.h',
'codec.h',
@@ -89,9 +91,12 @@
'sources': [
'filters_mips.c',
'lattice_mips.c',
+ 'pitch_estimator_mips.c',
+ 'transform_mips.c',
],
'sources!': [
'lattice_c.c',
+ 'pitch_estimator_c.c',
],
'conditions': [
['mips_dsp_rev>0', {
@@ -101,6 +106,7 @@
}],
['mips_dsp_rev>1', {
'sources': [
+ 'lpc_masking_model_mips.c',
'pitch_filter_mips.c',
],
'sources!': [
diff --git a/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h b/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h
index 72e0cfc4..1270c142 100644
--- a/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h
+++ b/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h
@@ -62,6 +62,15 @@ int32_t WebRtcIsacfix_CalculateResidualEnergyNeon(int lpc_order,
int* q_val_residual_energy);
#endif
+#if defined(MIPS_DSP_R2_LE)
+int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
+ int32_t q_val_corr,
+ int q_val_polynomial,
+ int16_t* a_polynomial,
+ int32_t* corr_coeffs,
+ int* q_val_residual_energy);
+#endif
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model_mips.c b/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model_mips.c
new file mode 100644
index 00000000..55602b97
--- /dev/null
+++ b/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model_mips.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
+
+// MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy
+// Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file
+// lpc_masking_model.c
+int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
+ int32_t q_val_corr,
+ int q_val_polynomial,
+ int16_t* a_polynomial,
+ int32_t* corr_coeffs,
+ int* q_val_residual_energy) {
+
+ int i = 0, j = 0;
+ int shift_internal = 0, shift_norm = 0;
+ int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0;
+ int32_t tmp_corr_c = corr_coeffs[0];
+ int16_t* tmp_a_poly = &a_polynomial[0];
+ int32_t sum64_hi = 0;
+ int32_t sum64_lo = 0;
+
+ for (j = 0; j <= lpc_order; j++) {
+ // For the case of i == 0:
+ // residual_energy +=
+ // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i];
+
+ int32_t tmp2, tmp3;
+ int16_t sign_1;
+ int16_t sign_2;
+ int16_t sign_3;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lh %[tmp2], 0(%[tmp_a_poly]) \n\t"
+ "mul %[tmp32], %[tmp2], %[tmp2] \n\t"
+ "addiu %[tmp_a_poly], %[tmp_a_poly], 2 \n\t"
+ "sra %[sign_2], %[sum64_hi], 31 \n\t"
+ "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
+ "shilov $ac0, %[shift_internal] \n\t"
+ "mfhi %[tmp2], $ac0 \n\t"
+ "mflo %[tmp3], $ac0 \n\t"
+ "sra %[sign_1], %[tmp2], 31 \n\t"
+ "xor %[sign_3], %[sign_1], %[sign_2] \n\t"
+ ".set pop \n\t"
+ : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
+ [tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1),
+ [sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2),
+ [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
+ : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
+ : "hi", "lo", "memory"
+ );
+
+ if (sign_3 != 0) {
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
+ "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
+ ".set pop \n\t"
+ : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
+ : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
+ : "hi", "lo", "memory"
+ );
+ } else {
+ if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
+ ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
+ // Shift right for overflow.
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[shift_internal], %[shift_internal], 1 \n\t"
+ "prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
+ "sra %[sum64_hi], %[sum64_hi], 1 \n\t"
+ "prepend %[tmp3], %[tmp2], 1 \n\t"
+ "sra %[tmp2], %[tmp2], 1 \n\t"
+ "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
+ "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
+ ".set pop \n\t"
+ : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
+ [shift_internal] "+r" (shift_internal),
+ [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
+ :
+ : "hi", "lo", "memory"
+ );
+ } else {
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
+ "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
+ ".set pop \n\t"
+ : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
+ : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
+ : "hi", "lo", "memory"
+ );
+ }
+ }
+ }
+
+ for (i = 1; i <= lpc_order; i++) {
+ tmp_corr_c = corr_coeffs[i];
+ int16_t* tmp_a_poly_j = &a_polynomial[i];
+ int16_t* tmp_a_poly_j_i = &a_polynomial[0];
+ for (j = i; j <= lpc_order; j++) {
+ // For the case of i = 1 .. lpc_order:
+ // residual_energy +=
+ // a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2;
+
+ int32_t tmp2, tmp3;
+ int16_t sign_1;
+ int16_t sign_2;
+ int16_t sign_3;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lh %[tmp3], 0(%[tmp_a_poly_j]) \n\t"
+ "lh %[tmp2], 0(%[tmp_a_poly_j_i]) \n\t"
+ "addiu %[tmp_a_poly_j], %[tmp_a_poly_j], 2 \n\t"
+ "addiu %[tmp_a_poly_j_i], %[tmp_a_poly_j_i], 2 \n\t"
+ "mul %[tmp32], %[tmp3], %[tmp2] \n\t"
+ "sll %[tmp32], %[tmp32], 1 \n\t"
+ "mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
+ "shilov $ac0, %[shift_internal] \n\t"
+ "mfhi %[tmp2], $ac0 \n\t"
+ "mflo %[tmp3], $ac0 \n\t"
+ "sra %[sign_1], %[tmp2], 31 \n\t"
+ "sra %[sign_2], %[sum64_hi], 31 \n\t"
+ "xor %[sign_3], %[sign_1], %[sign_2] \n\t"
+ ".set pop \n\t"
+ : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
+ [tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1),
+ [tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2),
+ [sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi),
+ [sum64_lo] "+r" (sum64_lo)
+ : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
+ : "hi", "lo", "memory"
+ );
+ if (sign_3 != 0) {
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
+ "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
+ ".set pop \n\t"
+ : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi),
+ [sum64_lo] "+r" (sum64_lo)
+ :
+ :"memory"
+ );
+ } else {
+ // Test overflow and sum the result.
+ if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
+ ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
+ // Shift right for overflow.
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[shift_internal], %[shift_internal], 1 \n\t"
+ "prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
+ "sra %[sum64_hi], %[sum64_hi], 1 \n\t"
+ "prepend %[tmp3], %[tmp2], 1 \n\t"
+ "sra %[tmp2], %[tmp2], 1 \n\t"
+ "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
+ "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
+ ".set pop \n\t"
+ : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
+ [shift_internal] "+r" (shift_internal),
+ [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
+ :
+ : "hi", "lo", "memory"
+ );
+ } else {
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
+ "addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
+ ".set pop \n\t"
+ : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
+ [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
+ :
+ : "hi", "lo", "memory"
+ );
+ }
+ }
+ }
+ }
+ word32_high = sum64_hi;
+ word32_low = sum64_lo;
+
+ // Calculate the value of shifting (shift_norm) for the 64-bit sum.
+ if (word32_high != 0) {
+ shift_norm = 32 - WebRtcSpl_NormW32(word32_high);
+ int tmp1;
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "srl %[residual_energy], %[sum64_lo], %[shift_norm] \n\t"
+ "li %[tmp1], 32 \n\t"
+ "subu %[tmp1], %[tmp1], %[shift_norm] \n\t"
+ "sll %[tmp1], %[sum64_hi], %[tmp1] \n\t"
+ "or %[residual_energy], %[residual_energy], %[tmp1] \n\t"
+ ".set pop \n\t"
+ : [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1),
+ [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
+ : [shift_norm] "r" (shift_norm)
+ : "memory"
+ );
+ } else {
+ if ((word32_low & 0x80000000) != 0) {
+ shift_norm = 1;
+ residual_energy = (uint32_t)word32_low >> 1;
+ } else {
+ shift_norm = WebRtcSpl_NormW32(word32_low);
+ residual_energy = word32_low << shift_norm;
+ shift_norm = -shift_norm;
+ }
+ }
+
+ // Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm
+ // = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2)
+ *q_val_residual_energy =
+ q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2;
+
+ return residual_energy;
+}
diff --git a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.c b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.c
index 9c4e5875..426b2cf4 100644
--- a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.c
+++ b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.c
@@ -29,7 +29,7 @@ static const int16_t kACoefQ12[3] = {
-static __inline int32_t Log2Q8( uint32_t x ) {
+__inline int32_t WebRtcIsacfix_Log2Q8( uint32_t x ) {
int32_t zeros, lg2;
int16_t frac;
@@ -153,109 +153,7 @@ static void FindFour32(int32_t *in, int16_t length, int16_t *bestind)
-static void PCorr2Q32(const int16_t *in, int32_t *logcorQ8)
-{
- int16_t scaling,n,k;
- int32_t ysum32,csum32, lys, lcs;
- int32_t oneQ8;
-
-
- const int16_t *x, *inptr;
-
- oneQ8 = WEBRTC_SPL_LSHIFT_W32((int32_t)1, 8); // 1.00 in Q8
-
- x = in + PITCH_MAX_LAG/2 + 2;
- scaling = WebRtcSpl_GetScalingSquare ((int16_t *) in, PITCH_CORR_LEN2, PITCH_CORR_LEN2);
- ysum32 = 1;
- csum32 = 0;
- x = in + PITCH_MAX_LAG/2 + 2;
- for (n = 0; n < PITCH_CORR_LEN2; n++) {
- ysum32 += WEBRTC_SPL_MUL_16_16_RSFT( (int16_t) in[n],(int16_t) in[n], scaling); // Q0
- csum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t) x[n],(int16_t) in[n], scaling); // Q0
- }
-
- logcorQ8 += PITCH_LAG_SPAN2 - 1;
-
- lys=Log2Q8((uint32_t) ysum32); // Q8
- lys=WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum);
-
- if (csum32>0) {
-
- lcs=Log2Q8((uint32_t) csum32); // 2log(csum) in Q8
-
- if (lcs>(lys + oneQ8) ){ // csum/sqrt(ysum) > 2 in Q8
- *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum))
- } else {
- *logcorQ8 = oneQ8; // 1.00
- }
-
- } else {
- *logcorQ8 = 0;
- }
-
-
- for (k = 1; k < PITCH_LAG_SPAN2; k++) {
- inptr = &in[k];
- ysum32 -= WEBRTC_SPL_MUL_16_16_RSFT( (int16_t) in[k-1],(int16_t) in[k-1], scaling);
- ysum32 += WEBRTC_SPL_MUL_16_16_RSFT( (int16_t) in[PITCH_CORR_LEN2 + k - 1],(int16_t) in[PITCH_CORR_LEN2 + k - 1], scaling);
-
-#ifdef WEBRTC_ARCH_ARM_NEON
- {
- int32_t vbuff[4];
- int32x4_t int_32x4_sum = vmovq_n_s32(0);
- // Can't shift a Neon register to right with a non-constant shift value.
- int32x4_t int_32x4_scale = vdupq_n_s32(-scaling);
- // Assert a codition used in loop unrolling at compile-time.
- COMPILE_ASSERT(PITCH_CORR_LEN2 %4 == 0);
-
- for (n = 0; n < PITCH_CORR_LEN2; n += 4) {
- int16x4_t int_16x4_x = vld1_s16(&x[n]);
- int16x4_t int_16x4_in = vld1_s16(&inptr[n]);
- int32x4_t int_32x4 = vmull_s16(int_16x4_x, int_16x4_in);
- int_32x4 = vshlq_s32(int_32x4, int_32x4_scale);
- int_32x4_sum = vaddq_s32(int_32x4_sum, int_32x4);
- }
-
- // Use vector store to avoid long stall from data trasferring
- // from vector to general register.
- vst1q_s32(vbuff, int_32x4_sum);
- csum32 = vbuff[0] + vbuff[1];
- csum32 += vbuff[2];
- csum32 += vbuff[3];
- }
-#else
- csum32 = 0;
- if(scaling == 0) {
- for (n = 0; n < PITCH_CORR_LEN2; n++) {
- csum32 += x[n] * inptr[n];
- }
- } else {
- for (n = 0; n < PITCH_CORR_LEN2; n++) {
- csum32 += (x[n] * inptr[n]) >> scaling;
- }
- }
-#endif
-
- logcorQ8--;
-
- lys=Log2Q8((uint32_t)ysum32); // Q8
- lys=WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum);
-
- if (csum32>0) {
-
- lcs=Log2Q8((uint32_t) csum32); // 2log(csum) in Q8
-
- if (lcs>(lys + oneQ8) ){ // csum/sqrt(ysum) > 2
- *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum))
- } else {
- *logcorQ8 = oneQ8; // 1.00
- }
-
- } else {
- *logcorQ8 = 0;
- }
- }
-}
+extern void WebRtcIsacfix_PCorr2Q32(const int16_t *in, int32_t *logcorQ8);
@@ -311,12 +209,13 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */
/* compute correlation for first and second half of the frame */
- PCorr2Q32(buf_dec16, crrvecQ8_1);
- PCorr2Q32(buf_dec16 + PITCH_CORR_STEP2, crrvecQ8_2);
+ WebRtcIsacfix_PCorr2Q32(buf_dec16, crrvecQ8_1);
+ WebRtcIsacfix_PCorr2Q32(buf_dec16 + PITCH_CORR_STEP2, crrvecQ8_2);
/* bias towards pitch lag of previous frame */
- tmp32a = Log2Q8((uint32_t) old_lagQ8) - 2304; // log2(0.5*oldlag) in Q8
+ tmp32a = WebRtcIsacfix_Log2Q8((uint32_t) old_lagQ8) - 2304;
+ // log2(0.5*oldlag) in Q8
tmp32b = WEBRTC_SPL_MUL_16_16_RSFT(oldgQ12,oldgQ12, 10); //Q12 & * 4.0;
gain_bias16 = (int16_t) tmp32b; //Q12
if (gain_bias16 > 3276) gain_bias16 = 3276; // 0.8 in Q12
@@ -325,7 +224,7 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */
for (k = 0; k < PITCH_LAG_SPAN2; k++)
{
if (crrvecQ8_1[k]>0) {
- tmp32b = Log2Q8((uint32_t) (k + (PITCH_MIN_LAG/2-2)));
+ tmp32b = WebRtcIsacfix_Log2Q8((uint32_t) (k + (PITCH_MIN_LAG/2-2)));
tmp16a = (int16_t) (tmp32b - tmp32a); // Q8 & fabs(ratio)<4
tmp32c = WEBRTC_SPL_MUL_16_16_RSFT(tmp16a,tmp16a, 6); //Q10
tmp16b = (int16_t) tmp32c; // Q10 & <8
@@ -334,7 +233,8 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */
tmp16d = Exp2Q10((int16_t) -tmp16c); //Q10
tmp32c = WEBRTC_SPL_MUL_16_16_RSFT(gain_bias16,tmp16d,13); // Q10 & * 0.5
bias16 = (int16_t) (1024 + tmp32c); // Q10
- tmp32b = Log2Q8((uint32_t) bias16) - 2560; // Q10 in -> Q8 out with 10*2^8 offset
+ tmp32b = WebRtcIsacfix_Log2Q8((uint32_t)bias16) - 2560;
+ // Q10 in -> Q8 out with 10*2^8 offset
crrvecQ8_1[k] += tmp32b ; // -10*2^8 offset
}
}
@@ -407,7 +307,7 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */
xq[0] = WEBRTC_SPL_LSHIFT_W32(xq[0], 8);
Intrp1DQ8(xq, fxq, yq, fyq);
- tmp32a= Log2Q8((uint32_t) *yq) - 2048; // offset 8*2^8
+ tmp32a= WebRtcIsacfix_Log2Q8((uint32_t) *yq) - 2048; // offset 8*2^8
/* Bias towards short lags */
/* log(pow(0.8, log(2.0 * *y )))/log(2.0) */
tmp32b= WEBRTC_SPL_MUL_16_16_RSFT((int16_t) tmp32a, -42, 8);
@@ -437,10 +337,13 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */
tmp32b = (int32_t) (WEBRTC_SPL_LSHIFT_W32(tmp32a, 1)) - ratq; // Q8
tmp32c = WEBRTC_SPL_MUL_16_16_RSFT((int16_t) tmp32b, (int16_t) tmp32b, 8); // Q8
- tmp32b = (int32_t) tmp32c + (int32_t) WEBRTC_SPL_RSHIFT_W32(ratq, 1); // (k-r)^2 + 0.5 * r Q8
- tmp32c = Log2Q8((uint32_t) tmp32a) - 2048; // offset 8*2^8 , log2(0.5*k) Q8
- tmp32d = Log2Q8((uint32_t) tmp32b) - 2048; // offset 8*2^8 , log2(0.5*k) Q8
- tmp32e = tmp32c -tmp32d;
+ tmp32b = (int32_t)tmp32c + (int32_t)WEBRTC_SPL_RSHIFT_W32(ratq, 1);
+ // (k-r)^2 + 0.5 * r Q8
+ tmp32c = WebRtcIsacfix_Log2Q8((uint32_t)tmp32a) - 2048;
+ // offset 8*2^8 , log2(0.5*k) Q8
+ tmp32d = WebRtcIsacfix_Log2Q8((uint32_t)tmp32b) - 2048;
+ // offset 8*2^8 , log2(0.5*k) Q8
+ tmp32e = tmp32c - tmp32d;
cv2q[k] += WEBRTC_SPL_RSHIFT_W32(tmp32e, 1);
@@ -481,7 +384,7 @@ void WebRtcIsacfix_InitialPitch(const int16_t *in, /* Q0 */
/* Bias towards short lags */
/* log(pow(0.8, log(2.0f * *y )))/log(2.0f) */
- tmp32a= Log2Q8((uint32_t) *yq) - 2048; // offset 8*2^8
+ tmp32a= WebRtcIsacfix_Log2Q8((uint32_t) *yq) - 2048; // offset 8*2^8
tmp32b= WEBRTC_SPL_MUL_16_16_RSFT((int16_t) tmp32a, -82, 8);
tmp32c= tmp32b + 256;
*fyq += tmp32c;
diff --git a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h
index 93c81c8e..da401e5f 100644
--- a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h
+++ b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h
@@ -58,4 +58,8 @@ void WebRtcIsacfix_DecimateAllpass32(const int16_t *in,
int16_t N, /* number of input samples */
int16_t *out); /* array of size N/2 */
+int32_t WebRtcIsacfix_Log2Q8( uint32_t x );
+
+void WebRtcIsacfix_PCorr2Q32(const int16_t* in, int32_t* logcorQ8);
+
#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_PITCH_ESTIMATOR_H_ */
diff --git a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_c.c b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_c.c
new file mode 100644
index 00000000..82155d27
--- /dev/null
+++ b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_c.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h"
+
+#ifdef WEBRTC_ARCH_ARM_NEON
+#include <arm_neon.h>
+#endif
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+
+extern int32_t WebRtcIsacfix_Log2Q8(uint32_t x);
+
+void WebRtcIsacfix_PCorr2Q32(const int16_t* in, int32_t* logcorQ8) {
+ int16_t scaling,n,k;
+ int32_t ysum32,csum32, lys, lcs;
+ int32_t oneQ8;
+ const int16_t* x;
+ const int16_t* inptr;
+
+ oneQ8 = WEBRTC_SPL_LSHIFT_W32((int32_t)1, 8); // 1.00 in Q8
+
+ x = in + PITCH_MAX_LAG / 2 + 2;
+ scaling = WebRtcSpl_GetScalingSquare((int16_t*)in,
+ PITCH_CORR_LEN2,
+ PITCH_CORR_LEN2);
+ ysum32 = 1;
+ csum32 = 0;
+ x = in + PITCH_MAX_LAG / 2 + 2;
+ for (n = 0; n < PITCH_CORR_LEN2; n++) {
+ ysum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)in[n],
+ (int16_t)in[n],
+ scaling); // Q0
+ csum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)x[n],
+ (int16_t)in[n],
+ scaling); // Q0
+ }
+ logcorQ8 += PITCH_LAG_SPAN2 - 1;
+ lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8
+ lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum);
+ if (csum32 > 0) {
+ lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8
+ if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2 in Q8
+ *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum))
+ } else {
+ *logcorQ8 = oneQ8; // 1.00
+ }
+ } else {
+ *logcorQ8 = 0;
+ }
+
+
+ for (k = 1; k < PITCH_LAG_SPAN2; k++) {
+ inptr = &in[k];
+ ysum32 -= WEBRTC_SPL_MUL_16_16_RSFT((int16_t)in[k - 1],
+ (int16_t)in[k - 1],
+ scaling);
+ ysum32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)in[PITCH_CORR_LEN2 + k - 1],
+ (int16_t)in[PITCH_CORR_LEN2 + k - 1],
+ scaling);
+#ifdef WEBRTC_ARCH_ARM_NEON
+ {
+ int32_t vbuff[4];
+ int32x4_t int_32x4_sum = vmovq_n_s32(0);
+ // Can't shift a Neon register to right with a non-constant shift value.
+ int32x4_t int_32x4_scale = vdupq_n_s32(-scaling);
+ // Assert a codition used in loop unrolling at compile-time.
+ COMPILE_ASSERT(PITCH_CORR_LEN2 %4 == 0);
+
+ for (n = 0; n < PITCH_CORR_LEN2; n += 4) {
+ int16x4_t int_16x4_x = vld1_s16(&x[n]);
+ int16x4_t int_16x4_in = vld1_s16(&inptr[n]);
+ int32x4_t int_32x4 = vmull_s16(int_16x4_x, int_16x4_in);
+ int_32x4 = vshlq_s32(int_32x4, int_32x4_scale);
+ int_32x4_sum = vaddq_s32(int_32x4_sum, int_32x4);
+ }
+
+ // Use vector store to avoid long stall from data trasferring
+ // from vector to general register.
+ vst1q_s32(vbuff, int_32x4_sum);
+ csum32 = vbuff[0] + vbuff[1];
+ csum32 += vbuff[2];
+ csum32 += vbuff[3];
+ }
+#else
+ csum32 = 0;
+ if(scaling == 0) {
+ for (n = 0; n < PITCH_CORR_LEN2; n++) {
+ csum32 += x[n] * inptr[n];
+ }
+ } else {
+ for (n = 0; n < PITCH_CORR_LEN2; n++) {
+ csum32 += (x[n] * inptr[n]) >> scaling;
+ }
+ }
+#endif
+
+ logcorQ8--;
+
+ lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8
+ lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum);
+
+ if (csum32 > 0) {
+ lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8
+ if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2
+ *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum))
+ } else {
+ *logcorQ8 = oneQ8; // 1.00
+ }
+ } else {
+ *logcorQ8 = 0;
+ }
+ }
+}
diff --git a/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_mips.c b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_mips.c
new file mode 100644
index 00000000..fa426e98
--- /dev/null
+++ b/modules/audio_coding/codecs/isac/fix/source/pitch_estimator_mips.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+
+extern int32_t WebRtcIsacfix_Log2Q8(uint32_t x);
+
+void WebRtcIsacfix_PCorr2Q32(const int16_t* in, int32_t* logcorQ8) {
+ int16_t scaling,n,k;
+ int32_t ysum32,csum32, lys, lcs;
+ int32_t oneQ8;
+ const int16_t* x;
+ const int16_t* inptr;
+
+ oneQ8 = WEBRTC_SPL_LSHIFT_W32((int32_t)1, 8); // 1.00 in Q8
+ x = in + PITCH_MAX_LAG / 2 + 2;
+ scaling = WebRtcSpl_GetScalingSquare((int16_t*)in,
+ PITCH_CORR_LEN2,
+ PITCH_CORR_LEN2);
+ ysum32 = 1;
+ csum32 = 0;
+ x = in + PITCH_MAX_LAG / 2 + 2;
+ {
+ const int16_t* tmp_x = x;
+ const int16_t* tmp_in = in;
+ int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+ n = PITCH_CORR_LEN2;
+ COMPILE_ASSERT(PITCH_CORR_LEN2 % 4 == 0);
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "1: \n\t"
+ "lh %[tmp1], 0(%[tmp_in]) \n\t"
+ "lh %[tmp2], 2(%[tmp_in]) \n\t"
+ "lh %[tmp3], 4(%[tmp_in]) \n\t"
+ "lh %[tmp4], 6(%[tmp_in]) \n\t"
+ "lh %[tmp5], 0(%[tmp_x]) \n\t"
+ "lh %[tmp6], 2(%[tmp_x]) \n\t"
+ "lh %[tmp7], 4(%[tmp_x]) \n\t"
+ "lh %[tmp8], 6(%[tmp_x]) \n\t"
+ "mul %[tmp5], %[tmp1], %[tmp5] \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp1] \n\t"
+ "mul %[tmp6], %[tmp2], %[tmp6] \n\t"
+ "mul %[tmp2], %[tmp2], %[tmp2] \n\t"
+ "mul %[tmp7], %[tmp3], %[tmp7] \n\t"
+ "mul %[tmp3], %[tmp3], %[tmp3] \n\t"
+ "mul %[tmp8], %[tmp4], %[tmp8] \n\t"
+ "mul %[tmp4], %[tmp4], %[tmp4] \n\t"
+ "addiu %[n], %[n], -4 \n\t"
+ "srav %[tmp5], %[tmp5], %[scaling] \n\t"
+ "srav %[tmp1], %[tmp1], %[scaling] \n\t"
+ "srav %[tmp6], %[tmp6], %[scaling] \n\t"
+ "srav %[tmp2], %[tmp2], %[scaling] \n\t"
+ "srav %[tmp7], %[tmp7], %[scaling] \n\t"
+ "srav %[tmp3], %[tmp3], %[scaling] \n\t"
+ "srav %[tmp8], %[tmp8], %[scaling] \n\t"
+ "srav %[tmp4], %[tmp4], %[scaling] \n\t"
+ "addu %[ysum32], %[ysum32], %[tmp1] \n\t"
+ "addu %[csum32], %[csum32], %[tmp5] \n\t"
+ "addu %[ysum32], %[ysum32], %[tmp2] \n\t"
+ "addu %[csum32], %[csum32], %[tmp6] \n\t"
+ "addu %[ysum32], %[ysum32], %[tmp3] \n\t"
+ "addu %[csum32], %[csum32], %[tmp7] \n\t"
+ "addu %[ysum32], %[ysum32], %[tmp4] \n\t"
+ "addu %[csum32], %[csum32], %[tmp8] \n\t"
+ "addiu %[tmp_in], %[tmp_in], 8 \n\t"
+ "bgtz %[n], 1b \n\t"
+ " addiu %[tmp_x], %[tmp_x], 8 \n\t"
+ ".set pop \n\t"
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+ [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+ [tmp7] "=&r" (tmp7), [tmp8] "=&r" (tmp8), [tmp_in] "+r" (tmp_in),
+ [ysum32] "+r" (ysum32), [tmp_x] "+r" (tmp_x), [csum32] "+r" (csum32),
+ [n] "+r" (n)
+ : [scaling] "r" (scaling)
+ : "memory", "hi", "lo"
+ );
+ }
+ logcorQ8 += PITCH_LAG_SPAN2 - 1;
+ lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8
+ lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum);
+ if (csum32 > 0) {
+ lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8
+ if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2 in Q8
+ *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum))
+ } else {
+ *logcorQ8 = oneQ8; // 1.00
+ }
+ } else {
+ *logcorQ8 = 0;
+ }
+
+ for (k = 1; k < PITCH_LAG_SPAN2; k++) {
+ inptr = &in[k];
+ const int16_t* tmp_in1 = &in[k - 1];
+ const int16_t* tmp_in2 = &in[PITCH_CORR_LEN2 + k - 1];
+ const int16_t* tmp_x = x;
+ int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+ n = PITCH_CORR_LEN2;
+ csum32 = 0;
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "lh %[tmp1], 0(%[tmp_in1]) \n\t"
+ "lh %[tmp2], 0(%[tmp_in2]) \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp1] \n\t"
+ "mul %[tmp2], %[tmp2], %[tmp2] \n\t"
+ "srav %[tmp1], %[tmp1], %[scaling] \n\t"
+ "srav %[tmp2], %[tmp2], %[scaling] \n\t"
+ "subu %[ysum32], %[ysum32], %[tmp1] \n\t"
+ "bnez %[scaling], 2f \n\t"
+ " addu %[ysum32], %[ysum32], %[tmp2] \n\t"
+ "1: \n\t"
+ "lh %[tmp1], 0(%[inptr]) \n\t"
+ "lh %[tmp2], 0(%[tmp_x]) \n\t"
+ "lh %[tmp3], 2(%[inptr]) \n\t"
+ "lh %[tmp4], 2(%[tmp_x]) \n\t"
+ "lh %[tmp5], 4(%[inptr]) \n\t"
+ "lh %[tmp6], 4(%[tmp_x]) \n\t"
+ "lh %[tmp7], 6(%[inptr]) \n\t"
+ "lh %[tmp8], 6(%[tmp_x]) \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "mul %[tmp2], %[tmp3], %[tmp4] \n\t"
+ "mul %[tmp3], %[tmp5], %[tmp6] \n\t"
+ "mul %[tmp4], %[tmp7], %[tmp8] \n\t"
+ "addiu %[n], %[n], -4 \n\t"
+ "addiu %[inptr], %[inptr], 8 \n\t"
+ "addiu %[tmp_x], %[tmp_x], 8 \n\t"
+ "addu %[csum32], %[csum32], %[tmp1] \n\t"
+ "addu %[csum32], %[csum32], %[tmp2] \n\t"
+ "addu %[csum32], %[csum32], %[tmp3] \n\t"
+ "bgtz %[n], 1b \n\t"
+ " addu %[csum32], %[csum32], %[tmp4] \n\t"
+ "b 3f \n\t"
+ " nop \n\t"
+ "2: \n\t"
+ "lh %[tmp1], 0(%[inptr]) \n\t"
+ "lh %[tmp2], 0(%[tmp_x]) \n\t"
+ "lh %[tmp3], 2(%[inptr]) \n\t"
+ "lh %[tmp4], 2(%[tmp_x]) \n\t"
+ "lh %[tmp5], 4(%[inptr]) \n\t"
+ "lh %[tmp6], 4(%[tmp_x]) \n\t"
+ "lh %[tmp7], 6(%[inptr]) \n\t"
+ "lh %[tmp8], 6(%[tmp_x]) \n\t"
+ "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
+ "mul %[tmp2], %[tmp3], %[tmp4] \n\t"
+ "mul %[tmp3], %[tmp5], %[tmp6] \n\t"
+ "mul %[tmp4], %[tmp7], %[tmp8] \n\t"
+ "addiu %[n], %[n], -4 \n\t"
+ "addiu %[inptr], %[inptr], 8 \n\t"
+ "addiu %[tmp_x], %[tmp_x], 8 \n\t"
+ "srav %[tmp1], %[tmp1], %[scaling] \n\t"
+ "srav %[tmp2], %[tmp2], %[scaling] \n\t"
+ "srav %[tmp3], %[tmp3], %[scaling] \n\t"
+ "srav %[tmp4], %[tmp4], %[scaling] \n\t"
+ "addu %[csum32], %[csum32], %[tmp1] \n\t"
+ "addu %[csum32], %[csum32], %[tmp2] \n\t"
+ "addu %[csum32], %[csum32], %[tmp3] \n\t"
+ "bgtz %[n], 2b \n\t"
+ " addu %[csum32], %[csum32], %[tmp4] \n\t"
+ "3: \n\t"
+ ".set pop \n\t"
+ : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+ [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+ [tmp7] "=&r" (tmp7), [tmp8] "=&r" (tmp8), [inptr] "+r" (inptr),
+ [csum32] "+r" (csum32), [tmp_x] "+r" (tmp_x), [ysum32] "+r" (ysum32),
+ [n] "+r" (n)
+ : [tmp_in1] "r" (tmp_in1), [tmp_in2] "r" (tmp_in2),
+ [scaling] "r" (scaling)
+ : "memory", "hi", "lo"
+ );
+
+ logcorQ8--;
+ lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32); // Q8
+ lys = WEBRTC_SPL_RSHIFT_W32(lys, 1); //sqrt(ysum);
+ if (csum32 > 0) {
+ lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8
+ if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2
+ *logcorQ8 = lcs - lys; // log2(csum/sqrt(ysum))
+ } else {
+ *logcorQ8 = oneQ8; // 1.00
+ }
+ } else {
+ *logcorQ8 = 0;
+ }
+ }
+}
diff --git a/modules/audio_coding/codecs/isac/fix/source/transform.c b/modules/audio_coding/codecs/isac/fix/source/transform.c
index 67e513c7..24ccc821 100644
--- a/modules/audio_coding/codecs/isac/fix/source/transform.c
+++ b/modules/audio_coding/codecs/isac/fix/source/transform.c
@@ -19,89 +19,13 @@
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
-#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
-/* Tables are defined in ARM assembly files. */
+/* Tables are defined in transform_tables.c file or ARM assembly files. */
/* Cosine table 1 in Q14 */
extern const int16_t WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2];
/* Sine table 1 in Q14 */
extern const int16_t WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2];
/* Sine table 2 in Q14 */
extern const int16_t WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4];
-#else
-/* Cosine table 1 in Q14 */
-static const int16_t WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2] = {
- 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270,
- 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880,
- 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218,
- 15137, 15053, 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295,
- 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380, 13255, 13128,
- 12998, 12867, 12733, 12597, 12458, 12318, 12176, 12031, 11885, 11736,
- 11585, 11433, 11278, 11121, 10963, 10803, 10641, 10477, 10311, 10143,
- 9974, 9803, 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377,
- 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859, 6664, 6467,
- 6270, 6071, 5872, 5671, 5469, 5266, 5063, 4859, 4653, 4447,
- 4240, 4033, 3825, 3616, 3406, 3196, 2986, 2775, 2563, 2351,
- 2139, 1926, 1713, 1499, 1285, 1072, 857, 643, 429, 214,
- 0, -214, -429, -643, -857, -1072, -1285, -1499, -1713, -1926,
- -2139, -2351, -2563, -2775, -2986, -3196, -3406, -3616, -3825, -4033,
- -4240, -4447, -4653, -4859, -5063, -5266, -5469, -5671, -5872, -6071,
- -6270, -6467, -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006,
- -8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456, -9630, -9803,
- -9974, -10143, -10311, -10477, -10641, -10803, -10963, -11121, -11278, -11433,
- -11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733,
- -12867, -12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856,
- -13970, -14081, -14189, -14295, -14399, -14500, -14598, -14694, -14788,
- -14879, -14968, -15053, -15137, -15218, -15296, -15371, -15444, -15515,
- -15582, -15647, -15709, -15769, -15826, -15880, -15931, -15980, -16026,
- -16069, -16110, -16147, -16182, -16214, -16244, -16270, -16294, -16315,
- -16333, -16349, -16362, -16371, -16378, -16383
-};
-
-/* Sine table 1 in Q14 */
-static const int16_t WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2] = {
- 0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926,
- 2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033,
- 4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071,
- 6270, 6467, 6664, 6859, 7053, 7246, 7438, 7629, 7818, 8006,
- 8192, 8377, 8561, 8743, 8923, 9102, 9280, 9456, 9630, 9803,
- 9974, 10143, 10311, 10477, 10641, 10803, 10963, 11121, 11278, 11433,
- 11585, 11736, 11885, 12031, 12176, 12318, 12458, 12597, 12733, 12867,
- 12998, 13128, 13255, 13380, 13502, 13623, 13741, 13856, 13970, 14081,
- 14189, 14295, 14399, 14500, 14598, 14694, 14788, 14879, 14968, 15053,
- 15137, 15218, 15296, 15371, 15444, 15515, 15582, 15647, 15709, 15769,
- 15826, 15880, 15931, 15980, 16026, 16069, 16110, 16147, 16182, 16214,
- 16244, 16270, 16294, 16315, 16333, 16349, 16362, 16371, 16378, 16383,
- 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270,
- 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880,
- 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218,
- 15137, 15053, 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295,
- 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380, 13255, 13128,
- 12998, 12867, 12733, 12597, 12458, 12318, 12176, 12031, 11885, 11736,
- 11585, 11433, 11278, 11121, 10963, 10803, 10641, 10477, 10311, 10143,
- 9974, 9803, 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377,
- 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859, 6664, 6467,
- 6270, 6071, 5872, 5671, 5469, 5266, 5063, 4859, 4653, 4447,
- 4240, 4033, 3825, 3616, 3406, 3196, 2986, 2775, 2563, 2351,
- 2139, 1926, 1713, 1499, 1285, 1072, 857, 643, 429, 214
-};
-
-
-/* Sine table 2 in Q14 */
-static const int16_t WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4] = {
- 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257,
- 16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853,
- 15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178,
- 15095, -15011, 14924, -14834, 14741, -14647, 14549, -14449, 14347, -14242,
- 14135, -14025, 13913, -13799, 13682, -13563, 13441, -13318, 13192, -13063,
- 12933, -12800, 12665, -12528, 12389, -12247, 12104, -11958, 11810, -11661,
- 11509, -11356, 11200, -11042, 10883, -10722, 10559, -10394, 10227, -10059,
- 9889, -9717, 9543, -9368, 9191, -9013, 8833, -8652, 8469, -8285,
- 8099, -7912, 7723, -7534, 7342, -7150, 6957, -6762, 6566, -6369,
- 6171, -5971, 5771, -5570, 5368, -5165, 4961, -4756, 4550, -4344,
- 4137, -3929, 3720, -3511, 3301, -3091, 2880, -2669, 2457, -2245,
- 2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107
-};
-#endif // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON
void WebRtcIsacfix_Time2SpecC(int16_t *inre1Q9,
int16_t *inre2Q9,
diff --git a/modules/audio_coding/codecs/isac/fix/source/transform_mips.c b/modules/audio_coding/codecs/isac/fix/source/transform_mips.c
new file mode 100644
index 00000000..bf95ee57
--- /dev/null
+++ b/modules/audio_coding/codecs/isac/fix/source/transform_mips.c
@@ -0,0 +1,1287 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h"
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
+
+// The tables are defined in transform_tables.c file.
+extern const int16_t WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2];
+extern const int16_t WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2];
+extern const int16_t WebRtcIsacfix_kCosTab2[FRAMESAMPLES/4];
+extern const int16_t WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4];
+
+// MIPS DSPr2 version of the WebRtcIsacfix_Time2Spec function
+// is not bit-exact with the C version.
+// The accuracy of the MIPS DSPr2 version is same or better.
+void WebRtcIsacfix_Time2SpecMIPS(int16_t* inre1Q9,
+ int16_t* inre2Q9,
+ int16_t* outreQ7,
+ int16_t* outimQ7) {
+ int k = FRAMESAMPLES / 2;
+ int32_t tmpreQ16[FRAMESAMPLES / 2], tmpimQ16[FRAMESAMPLES / 2];
+ int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
+ int32_t inre1, inre2, tmpre, tmpim, factor, max, max1;
+ int16_t* cosptr;
+ int16_t* sinptr;
+
+ cosptr = (int16_t*)WebRtcIsacfix_kCosTab1;
+ sinptr = (int16_t*)WebRtcIsacfix_kSinTab1;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[inre1], %[inre1Q9], 0 \n\t"
+ "addiu %[inre2], %[inre2Q9], 0 \n\t"
+ "addiu %[tmpre], %[tmpreQ16], 0 \n\t"
+ "addiu %[tmpim], %[tmpimQ16], 0 \n\t"
+ "addiu %[factor], $zero, 16921 \n\t"
+ "mul %[max], $zero, $zero \n\t"
+ // Multiply with complex exponentials and combine into one complex vector.
+ // Also, calculate the maximal absolute value in the same loop.
+ "1: \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "lwl %[r0], 0(%[inre1]) \n\t"
+ "lwl %[r2], 0(%[cosptr]) \n\t"
+ "lwl %[r3], 0(%[sinptr]) \n\t"
+ "lwl %[r1], 0(%[inre2]) \n\t"
+ "lwr %[r0], 0(%[inre1]) \n\t"
+ "lwr %[r2], 0(%[cosptr]) \n\t"
+ "lwr %[r3], 0(%[sinptr]) \n\t"
+ "lwr %[r1], 0(%[inre2]) \n\t"
+ "muleq_s.w.phr %[r4], %[r2], %[r0] \n\t"
+ "muleq_s.w.phr %[r5], %[r3], %[r0] \n\t"
+ "muleq_s.w.phr %[r6], %[r3], %[r1] \n\t"
+ "muleq_s.w.phr %[r7], %[r2], %[r1] \n\t"
+ "muleq_s.w.phl %[r8], %[r2], %[r0] \n\t"
+ "muleq_s.w.phl %[r0], %[r3], %[r0] \n\t"
+ "muleq_s.w.phl %[r3], %[r3], %[r1] \n\t"
+ "muleq_s.w.phl %[r1], %[r2], %[r1] \n\t"
+ "addiu %[k], %[k], -2 \n\t"
+ "addu %[r4], %[r4], %[r6] \n\t"
+ "subu %[r5], %[r7], %[r5] \n\t"
+ "sra %[r4], %[r4], 8 \n\t"
+ "sra %[r5], %[r5], 8 \n\t"
+ "mult $ac0, %[factor], %[r4] \n\t"
+ "mult $ac1, %[factor], %[r5] \n\t"
+ "addu %[r3], %[r8], %[r3] \n\t"
+ "subu %[r0], %[r1], %[r0] \n\t"
+ "sra %[r3], %[r3], 8 \n\t"
+ "sra %[r0], %[r0], 8 \n\t"
+ "mult $ac2, %[factor], %[r3] \n\t"
+ "mult $ac3, %[factor], %[r0] \n\t"
+ "extr_r.w %[r4], $ac0, 16 \n\t"
+ "extr_r.w %[r5], $ac1, 16 \n\t"
+ "addiu %[inre1], %[inre1], 4 \n\t"
+ "addiu %[inre2], %[inre2], 4 \n\t"
+ "extr_r.w %[r6], $ac2, 16 \n\t"
+ "extr_r.w %[r7], $ac3, 16 \n\t"
+ "addiu %[cosptr], %[cosptr], 4 \n\t"
+ "addiu %[sinptr], %[sinptr], 4 \n\t"
+ "shra_r.w %[r4], %[r4], 3 \n\t"
+ "shra_r.w %[r5], %[r5], 3 \n\t"
+ "sw %[r4], 0(%[tmpre]) \n\t"
+ "absq_s.w %[r4], %[r4] \n\t"
+ "sw %[r5], 0(%[tmpim]) \n\t"
+ "absq_s.w %[r5], %[r5] \n\t"
+ "shra_r.w %[r6], %[r6], 3 \n\t"
+ "shra_r.w %[r7], %[r7], 3 \n\t"
+ "sw %[r6], 4(%[tmpre]) \n\t"
+ "absq_s.w %[r6], %[r6] \n\t"
+ "sw %[r7], 4(%[tmpim]) \n\t"
+ "absq_s.w %[r7], %[r7] \n\t"
+ "slt %[r0], %[r4], %[r5] \n\t"
+ "movn %[r4], %[r5], %[r0] \n\t"
+ "slt %[r1], %[r6], %[r7] \n\t"
+ "movn %[r6], %[r7], %[r1] \n\t"
+ "slt %[r0], %[max], %[r4] \n\t"
+ "movn %[max], %[r4], %[r0] \n\t"
+ "slt %[r1], %[max], %[r6] \n\t"
+ "movn %[max], %[r6], %[r1] \n\t"
+ "addiu %[tmpre], %[tmpre], 8 \n\t"
+ "bgtz %[k], 1b \n\t"
+ " addiu %[tmpim], %[tmpim], 8 \n\t"
+#else // #if defined(MIPS_DSP_R2_LE)
+ "lh %[r0], 0(%[inre1]) \n\t"
+ "lh %[r1], 0(%[inre2]) \n\t"
+ "lh %[r2], 0(%[cosptr]) \n\t"
+ "lh %[r3], 0(%[sinptr]) \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "mul %[r4], %[r0], %[r2] \n\t"
+ "mul %[r5], %[r1], %[r3] \n\t"
+ "mul %[r0], %[r0], %[r3] \n\t"
+ "mul %[r2], %[r1], %[r2] \n\t"
+ "addiu %[inre1], %[inre1], 2 \n\t"
+ "addiu %[inre2], %[inre2], 2 \n\t"
+ "addiu %[cosptr], %[cosptr], 2 \n\t"
+ "addiu %[sinptr], %[sinptr], 2 \n\t"
+ "addu %[r1], %[r4], %[r5] \n\t"
+ "sra %[r1], %[r1], 7 \n\t"
+ "sra %[r3], %[r1], 16 \n\t"
+ "andi %[r1], %[r1], 0xFFFF \n\t"
+ "sra %[r1], %[r1], 1 \n\t"
+ "mul %[r1], %[factor], %[r1] \n\t"
+ "mul %[r3], %[factor], %[r3] \n\t"
+ "subu %[r0], %[r2], %[r0] \n\t"
+ "sra %[r0], %[r0], 7 \n\t"
+ "sra %[r2], %[r0], 16 \n\t"
+ "andi %[r0], %[r0], 0xFFFF \n\t"
+ "sra %[r0], %[r0], 1 \n\t"
+ "mul %[r0], %[factor], %[r0] \n\t"
+ "mul %[r2], %[factor], %[r2] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r1], %[r1], 15 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r1], %[r1], 0x4000 \n\t"
+ "sra %[r1], %[r1], 15 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r1], %[r3], %[r1] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r1], %[r1], 3 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r1], %[r1], 4 \n\t"
+ "sra %[r1], %[r1], 3 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sw %[r1], 0(%[tmpre]) \n\t"
+ "addiu %[tmpre], %[tmpre], 4 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "absq_s.w %[r1], %[r1] \n\t"
+ "shra_r.w %[r0], %[r0], 15 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "negu %[r4], %[r1] \n\t"
+ "slt %[r3], %[r1], $zero \n\t"
+ "movn %[r1], %[r4], %[r3] \n\t"
+ "addiu %[r0], %[r0], 0x4000 \n\t"
+ "sra %[r0], %[r0], 15 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r0], %[r0], %[r2] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r0], %[r0], 3 \n\t"
+ "sw %[r0], 0(%[tmpim]) \n\t"
+ "absq_s.w %[r0], %[r0] \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r0], %[r0], 4 \n\t"
+ "sra %[r0], %[r0], 3 \n\t"
+ "sw %[r0], 0(%[tmpim]) \n\t"
+ "negu %[r2], %[r0] \n\t"
+ "slt %[r3], %[r0], $zero \n\t"
+ "movn %[r0], %[r2], %[r3] \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "slt %[r2], %[max], %[r1] \n\t"
+ "movn %[max], %[r1], %[r2] \n\t"
+ "slt %[r2], %[max], %[r0] \n\t"
+ "movn %[max], %[r0], %[r2] \n\t"
+ "bgtz %[k], 1b \n\t"
+ " addiu %[tmpim], %[tmpim], 4 \n\t"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ // Calculate WebRtcSpl_NormW32(max).
+ // If max gets value >=0, we should shift max steps to the left, and the
+ // domain will be Q(16+shift). If max gets value <0, we should shift -max
+ // steps to the right, and the domain will be Q(16+max)
+ "clz %[max], %[max] \n\t"
+ "addiu %[max], %[max], -25 \n\t"
+ ".set pop \n\t"
+ : [k] "+r" (k), [inre1] "=&r" (inre1), [inre2] "=&r" (inre2),
+ [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [tmpre] "=&r" (tmpre),
+ [tmpim] "=&r" (tmpim), [max] "=&r" (max), [factor] "=&r" (factor),
+#if defined(MIPS_DSP_R2_LE)
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
+#endif // #if defined(MIPS_DSP_R2_LE)
+ [r5] "=&r" (r5)
+ : [inre1Q9] "r" (inre1Q9), [inre2Q9] "r" (inre2Q9),
+ [tmpreQ16] "r" (tmpreQ16), [tmpimQ16] "r" (tmpimQ16),
+ [cosptr] "r" (cosptr), [sinptr] "r" (sinptr)
+ : "hi", "lo", "memory"
+ );
+
+ // "Fastest" vectors
+ k = FRAMESAMPLES / 4;
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[tmpre], %[tmpreQ16], 0 \n\t"
+ "addiu %[tmpim], %[tmpimQ16], 0 \n\t"
+ "addiu %[inre1], %[inre1Q9], 0 \n\t"
+ "addiu %[inre2], %[inre2Q9], 0 \n\t"
+ "blez %[max], 2f \n\t"
+ " subu %[max1], $zero, %[max] \n\t"
+ "1: \n\t"
+ "lw %[r0], 0(%[tmpre]) \n\t"
+ "lw %[r1], 0(%[tmpim]) \n\t"
+ "lw %[r2], 4(%[tmpre]) \n\t"
+ "lw %[r3], 4(%[tmpim]) \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "sllv %[r0], %[r0], %[max] \n\t"
+ "sllv %[r1], %[r1], %[max] \n\t"
+ "sllv %[r2], %[r2], %[max] \n\t"
+ "sllv %[r3], %[r3], %[max] \n\t"
+ "addiu %[tmpre], %[tmpre], 8 \n\t"
+ "addiu %[tmpim], %[tmpim], 8 \n\t"
+ "sh %[r0], 0(%[inre1]) \n\t"
+ "sh %[r1], 0(%[inre2]) \n\t"
+ "sh %[r2], 2(%[inre1]) \n\t"
+ "sh %[r3], 2(%[inre2]) \n\t"
+ "addiu %[inre1], %[inre1], 4 \n\t"
+ "bgtz %[k], 1b \n\t"
+ " addiu %[inre2], %[inre2], 4 \n\t"
+ "b 4f \n\t"
+ " nop \n\t"
+ "2: \n\t"
+#if !defined(MIPS_DSP_R1_LE)
+ "addiu %[r4], %[max1], -1 \n\t"
+ "addiu %[r5], $zero, 1 \n\t"
+ "sllv %[r4], %[r5], %[r4] \n\t"
+#endif // #if !defined(MIPS_DSP_R1_LE)
+ "3: \n\t"
+ "lw %[r0], 0(%[tmpre]) \n\t"
+ "lw %[r1], 0(%[tmpim]) \n\t"
+ "lw %[r2], 4(%[tmpre]) \n\t"
+ "lw %[r3], 4(%[tmpim]) \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shrav_r.w %[r0], %[r0], %[max1] \n\t"
+ "shrav_r.w %[r1], %[r1], %[max1] \n\t"
+ "shrav_r.w %[r2], %[r2], %[max1] \n\t"
+ "shrav_r.w %[r3], %[r3], %[max1] \n\t"
+#else // #if !defined(MIPS_DSP_R1_LE)
+ "addu %[r0], %[r0], %[r4] \n\t"
+ "addu %[r1], %[r1], %[r4] \n\t"
+ "addu %[r2], %[r2], %[r4] \n\t"
+ "addu %[r3], %[r3], %[r4] \n\t"
+ "srav %[r0], %[r0], %[max1] \n\t"
+ "srav %[r1], %[r1], %[max1] \n\t"
+ "srav %[r2], %[r2], %[max1] \n\t"
+ "srav %[r3], %[r3], %[max1] \n\t"
+#endif // #if !defined(MIPS_DSP_R1_LE)
+ "addiu %[tmpre], %[tmpre], 8 \n\t"
+ "addiu %[tmpim], %[tmpim], 8 \n\t"
+ "sh %[r0], 0(%[inre1]) \n\t"
+ "sh %[r1], 0(%[inre2]) \n\t"
+ "sh %[r2], 2(%[inre1]) \n\t"
+ "sh %[r3], 2(%[inre2]) \n\t"
+ "addiu %[inre1], %[inre1], 4 \n\t"
+ "bgtz %[k], 3b \n\t"
+ " addiu %[inre2], %[inre2], 4 \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [tmpre] "=&r" (tmpre), [tmpim] "=&r" (tmpim), [inre1] "=&r" (inre1),
+ [inre2] "=&r" (inre2), [k] "+r" (k), [max1] "=&r" (max1),
+#if !defined(MIPS_DSP_R1_LE)
+ [r4] "=&r" (r4), [r5] "=&r" (r5),
+#endif // #if !defined(MIPS_DSP_R1_LE)
+ [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
+ : [tmpreQ16] "r" (tmpreQ16), [tmpimQ16] "r" (tmpimQ16),
+ [inre1Q9] "r" (inre1Q9), [inre2Q9] "r" (inre2Q9), [max] "r" (max)
+ : "memory"
+ );
+
+ // Get DFT
+ WebRtcIsacfix_FftRadix16Fastest(inre1Q9, inre2Q9, -1); // real call
+
+ // "Fastest" vectors and
+ // Use symmetry to separate into two complex vectors
+ // and center frames in time around zero
+ // merged into one loop
+ cosptr = (int16_t*)WebRtcIsacfix_kCosTab2;
+ sinptr = (int16_t*)WebRtcIsacfix_kSinTab2;
+ k = FRAMESAMPLES / 4;
+ factor = FRAMESAMPLES - 2; // offset for FRAMESAMPLES / 2 - 1 array member
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[inre1], %[inre1Q9], 0 \n\t"
+ "addiu %[inre2], %[inre2Q9], 0 \n\t"
+ "addiu %[tmpre], %[outreQ7], 0 \n\t"
+ "addiu %[tmpim], %[outimQ7], 0 \n\t"
+ "bltz %[max], 2f \n\t"
+ " subu %[max1], $zero, %[max] \n\t"
+ "1: \n\t"
+#if !defined(MIPS_DSP_R1_LE)
+ "addu %[r4], %[inre1], %[offset] \n\t"
+ "addu %[r5], %[inre2], %[offset] \n\t"
+#endif // #if !defined(MIPS_DSP_R1_LE)
+ "lh %[r0], 0(%[inre1]) \n\t"
+ "lh %[r1], 0(%[inre2]) \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "lhx %[r2], %[offset](%[inre1]) \n\t"
+ "lhx %[r3], %[offset](%[inre2]) \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "lh %[r2], 0(%[r4]) \n\t"
+ "lh %[r3], 0(%[r5]) \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "srav %[r0], %[r0], %[max] \n\t"
+ "srav %[r1], %[r1], %[max] \n\t"
+ "srav %[r2], %[r2], %[max] \n\t"
+ "srav %[r3], %[r3], %[max] \n\t"
+ "addu %[r4], %[r0], %[r2] \n\t"
+ "subu %[r0], %[r2], %[r0] \n\t"
+ "subu %[r2], %[r1], %[r3] \n\t"
+ "addu %[r1], %[r1], %[r3] \n\t"
+ "lh %[r3], 0(%[cosptr]) \n\t"
+ "lh %[r5], 0(%[sinptr]) \n\t"
+ "andi %[r6], %[r4], 0xFFFF \n\t"
+ "sra %[r4], %[r4], 16 \n\t"
+ "mul %[r7], %[r3], %[r6] \n\t"
+ "mul %[r8], %[r3], %[r4] \n\t"
+ "mul %[r6], %[r5], %[r6] \n\t"
+ "mul %[r4], %[r5], %[r4] \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "addiu %[inre1], %[inre1], 2 \n\t"
+ "addiu %[inre2], %[inre2], 2 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r7], %[r7], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r7], %[r7], 0x2000 \n\t"
+ "sra %[r7], %[r7], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r8], %[r8], 2 \n\t"
+ "addu %[r8], %[r8], %[r7] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r6], %[r6], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r4], %[r4], 2 \n\t"
+ "addu %[r4], %[r4], %[r6] \n\t"
+ "andi %[r6], %[r2], 0xFFFF \n\t"
+ "sra %[r2], %[r2], 16 \n\t"
+ "mul %[r7], %[r5], %[r6] \n\t"
+ "mul %[r9], %[r5], %[r2] \n\t"
+ "mul %[r6], %[r3], %[r6] \n\t"
+ "mul %[r2], %[r3], %[r2] \n\t"
+ "addiu %[cosptr], %[cosptr], 2 \n\t"
+ "addiu %[sinptr], %[sinptr], 2 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r7], %[r7], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r7], %[r7], 0x2000 \n\t"
+ "sra %[r7], %[r7], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r9], %[r9], 2 \n\t"
+ "addu %[r9], %[r7], %[r9] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r6], %[r6], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r2], %[r2], 2 \n\t"
+ "addu %[r2], %[r6], %[r2] \n\t"
+ "subu %[r8], %[r8], %[r9] \n\t"
+ "sra %[r8], %[r8], 9 \n\t"
+ "addu %[r2], %[r4], %[r2] \n\t"
+ "sra %[r2], %[r2], 9 \n\t"
+ "sh %[r8], 0(%[tmpre]) \n\t"
+ "sh %[r2], 0(%[tmpim]) \n\t"
+
+ "andi %[r4], %[r1], 0xFFFF \n\t"
+ "sra %[r1], %[r1], 16 \n\t"
+ "andi %[r6], %[r0], 0xFFFF \n\t"
+ "sra %[r0], %[r0], 16 \n\t"
+ "mul %[r7], %[r5], %[r4] \n\t"
+ "mul %[r9], %[r5], %[r1] \n\t"
+ "mul %[r4], %[r3], %[r4] \n\t"
+ "mul %[r1], %[r3], %[r1] \n\t"
+ "mul %[r8], %[r3], %[r0] \n\t"
+ "mul %[r3], %[r3], %[r6] \n\t"
+ "mul %[r6], %[r5], %[r6] \n\t"
+ "mul %[r0], %[r5], %[r0] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r7], %[r7], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r7], %[r7], 0x2000 \n\t"
+ "sra %[r7], %[r7], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r9], %[r9], 2 \n\t"
+ "addu %[r9], %[r9], %[r7] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r4], %[r4], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r1], %[r1], 2 \n\t"
+ "addu %[r1], %[r1], %[r4] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r3], %[r3], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r3], %[r3], 0x2000 \n\t"
+ "sra %[r3], %[r3], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r8], %[r8], 2 \n\t"
+ "addu %[r8], %[r8], %[r3] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r6], %[r6], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r0], %[r0], 2 \n\t"
+ "addu %[r0], %[r0], %[r6] \n\t"
+ "addu %[r3], %[tmpre], %[offset] \n\t"
+ "addu %[r2], %[tmpim], %[offset] \n\t"
+ "addu %[r9], %[r9], %[r8] \n\t"
+ "negu %[r9], %[r9] \n\t"
+ "sra %[r9], %[r9], 9 \n\t"
+ "subu %[r0], %[r0], %[r1] \n\t"
+ "addiu %[offset], %[offset], -4 \n\t"
+ "sh %[r9], 0(%[r3]) \n\t"
+ "sh %[r0], 0(%[r2]) \n\t"
+ "addiu %[tmpre], %[tmpre], 2 \n\t"
+ "bgtz %[k], 1b \n\t"
+ " addiu %[tmpim], %[tmpim], 2 \n\t"
+ "b 3f \n\t"
+ " nop \n\t"
+ "2: \n\t"
+#if !defined(MIPS_DSP_R1_LE)
+ "addu %[r4], %[inre1], %[offset] \n\t"
+ "addu %[r5], %[inre2], %[offset] \n\t"
+#endif // #if !defined(MIPS_DSP_R1_LE)
+ "lh %[r0], 0(%[inre1]) \n\t"
+ "lh %[r1], 0(%[inre2]) \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "lhx %[r2], %[offset](%[inre1]) \n\t"
+ "lhx %[r3], %[offset](%[inre2]) \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "lh %[r2], 0(%[r4]) \n\t"
+ "lh %[r3], 0(%[r5]) \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sllv %[r0], %[r0], %[max1] \n\t"
+ "sllv %[r1], %[r1], %[max1] \n\t"
+ "sllv %[r2], %[r2], %[max1] \n\t"
+ "sllv %[r3], %[r3], %[max1] \n\t"
+ "addu %[r4], %[r0], %[r2] \n\t"
+ "subu %[r0], %[r2], %[r0] \n\t"
+ "subu %[r2], %[r1], %[r3] \n\t"
+ "addu %[r1], %[r1], %[r3] \n\t"
+ "lh %[r3], 0(%[cosptr]) \n\t"
+ "lh %[r5], 0(%[sinptr]) \n\t"
+ "andi %[r6], %[r4], 0xFFFF \n\t"
+ "sra %[r4], %[r4], 16 \n\t"
+ "mul %[r7], %[r3], %[r6] \n\t"
+ "mul %[r8], %[r3], %[r4] \n\t"
+ "mul %[r6], %[r5], %[r6] \n\t"
+ "mul %[r4], %[r5], %[r4] \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "addiu %[inre1], %[inre1], 2 \n\t"
+ "addiu %[inre2], %[inre2], 2 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r7], %[r7], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r7], %[r7], 0x2000 \n\t"
+ "sra %[r7], %[r7], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r8], %[r8], 2 \n\t"
+ "addu %[r8], %[r8], %[r7] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r6], %[r6], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r4], %[r4], 2 \n\t"
+ "addu %[r4], %[r4], %[r6] \n\t"
+ "andi %[r6], %[r2], 0xFFFF \n\t"
+ "sra %[r2], %[r2], 16 \n\t"
+ "mul %[r7], %[r5], %[r6] \n\t"
+ "mul %[r9], %[r5], %[r2] \n\t"
+ "mul %[r6], %[r3], %[r6] \n\t"
+ "mul %[r2], %[r3], %[r2] \n\t"
+ "addiu %[cosptr], %[cosptr], 2 \n\t"
+ "addiu %[sinptr], %[sinptr], 2 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r7], %[r7], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r7], %[r7], 0x2000 \n\t"
+ "sra %[r7], %[r7], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r9], %[r9], 2 \n\t"
+ "addu %[r9], %[r7], %[r9] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r6], %[r6], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r2], %[r2], 2 \n\t"
+ "addu %[r2], %[r6], %[r2] \n\t"
+ "subu %[r8], %[r8], %[r9] \n\t"
+ "sra %[r8], %[r8], 9 \n\t"
+ "addu %[r2], %[r4], %[r2] \n\t"
+ "sra %[r2], %[r2], 9 \n\t"
+ "sh %[r8], 0(%[tmpre]) \n\t"
+ "sh %[r2], 0(%[tmpim]) \n\t"
+ "andi %[r4], %[r1], 0xFFFF \n\t"
+ "sra %[r1], %[r1], 16 \n\t"
+ "andi %[r6], %[r0], 0xFFFF \n\t"
+ "sra %[r0], %[r0], 16 \n\t"
+ "mul %[r7], %[r5], %[r4] \n\t"
+ "mul %[r9], %[r5], %[r1] \n\t"
+ "mul %[r4], %[r3], %[r4] \n\t"
+ "mul %[r1], %[r3], %[r1] \n\t"
+ "mul %[r8], %[r3], %[r0] \n\t"
+ "mul %[r3], %[r3], %[r6] \n\t"
+ "mul %[r6], %[r5], %[r6] \n\t"
+ "mul %[r0], %[r5], %[r0] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r7], %[r7], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r7], %[r7], 0x2000 \n\t"
+ "sra %[r7], %[r7], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r9], %[r9], 2 \n\t"
+ "addu %[r9], %[r9], %[r7] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r4], %[r4], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+#endif
+ "sll %[r1], %[r1], 2 \n\t"
+ "addu %[r1], %[r1], %[r4] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r3], %[r3], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r3], %[r3], 0x2000 \n\t"
+ "sra %[r3], %[r3], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r8], %[r8], 2 \n\t"
+ "addu %[r8], %[r8], %[r3] \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r6], %[r6], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "sll %[r0], %[r0], 2 \n\t"
+ "addu %[r0], %[r0], %[r6] \n\t"
+ "addu %[r3], %[tmpre], %[offset] \n\t"
+ "addu %[r2], %[tmpim], %[offset] \n\t"
+ "addu %[r9], %[r9], %[r8] \n\t"
+ "negu %[r9], %[r9] \n\t"
+ "sra %[r9], %[r9], 9 \n\t"
+ "subu %[r0], %[r0], %[r1] \n\t"
+ "sra %[r0], %[r0], 9 \n\t"
+ "addiu %[offset], %[offset], -4 \n\t"
+ "sh %[r9], 0(%[r3]) \n\t"
+ "sh %[r0], 0(%[r2]) \n\t"
+ "addiu %[tmpre], %[tmpre], 2 \n\t"
+ "bgtz %[k], 2b \n\t"
+ " addiu %[tmpim], %[tmpim], 2 \n\t"
+ "3: \n\t"
+ ".set pop \n\t"
+ : [inre1] "=&r" (inre1), [inre2] "=&r" (inre2), [tmpre] "=&r" (tmpre),
+ [tmpim] "=&r" (tmpim), [offset] "+r" (factor), [k] "+r" (k),
+ [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+ [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+ [r8] "=&r" (r8), [r9] "=&r" (r9), [max1] "=&r" (max1)
+ : [inre1Q9] "r" (inre1Q9), [inre2Q9] "r" (inre2Q9),
+ [outreQ7] "r" (outreQ7), [outimQ7] "r" (outimQ7),
+ [max] "r" (max), [cosptr] "r" (cosptr), [sinptr] "r" (sinptr)
+ : "hi", "lo", "memory"
+ );
+}
+
+void WebRtcIsacfix_Spec2TimeMIPS(int16_t *inreQ7,
+ int16_t *inimQ7,
+ int32_t *outre1Q16,
+ int32_t *outre2Q16) {
+ int k = FRAMESAMPLES / 4;
+ int16_t* inre;
+ int16_t* inim;
+ int32_t* outre1;
+ int32_t* outre2;
+ int16_t* cosptr = (int16_t*)WebRtcIsacfix_kCosTab2;
+ int16_t* sinptr = (int16_t*)WebRtcIsacfix_kSinTab2;
+ int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, max, max1;
+#if defined(MIPS_DSP_R1_LE)
+ int32_t offset = FRAMESAMPLES - 4;
+#else // #if defined(MIPS_DSP_R1_LE)
+ int32_t offset = FRAMESAMPLES - 2;
+#endif // #if defined(MIPS_DSP_R1_LE)
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[inre], %[inreQ7], 0 \n\t"
+ "addiu %[inim] , %[inimQ7], 0 \n\t"
+ "addiu %[outre1], %[outre1Q16], 0 \n\t"
+ "addiu %[outre2], %[outre2Q16], 0 \n\t"
+ "mul %[max], $zero, $zero \n\t"
+ "1: \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ // Process two samples in one iteration avoiding left shift before
+ // multiplication. MaxAbsValueW32 function inlined into the loop.
+ "addu %[r8], %[inre], %[offset] \n\t"
+ "addu %[r9], %[inim], %[offset] \n\t"
+ "lwl %[r4], 0(%[r8]) \n\t"
+ "lwl %[r5], 0(%[r9]) \n\t"
+ "lwl %[r0], 0(%[inre]) \n\t"
+ "lwl %[r1], 0(%[inim]) \n\t"
+ "lwl %[r2], 0(%[cosptr]) \n\t"
+ "lwl %[r3], 0(%[sinptr]) \n\t"
+ "lwr %[r4], 0(%[r8]) \n\t"
+ "lwr %[r5], 0(%[r9]) \n\t"
+ "lwr %[r0], 0(%[inre]) \n\t"
+ "lwr %[r1], 0(%[inim]) \n\t"
+ "lwr %[r2], 0(%[cosptr]) \n\t"
+ "lwr %[r3], 0(%[sinptr]) \n\t"
+ "packrl.ph %[r4], %[r4], %[r4] \n\t"
+ "packrl.ph %[r5], %[r5], %[r5] \n\t"
+ "muleq_s.w.phr %[r6], %[r0], %[r2] \n\t"
+ "muleq_s.w.phr %[r7], %[r1], %[r3] \n\t"
+ "muleq_s.w.phr %[r8], %[r4], %[r2] \n\t"
+ "muleq_s.w.phr %[r9], %[r5], %[r3] \n\t"
+ "addiu %[k], %[k], -2 \n\t"
+ "addiu %[cosptr], %[cosptr], 4 \n\t"
+ "addiu %[sinptr], %[sinptr], 4 \n\t"
+ "addiu %[inre], %[inre], 4 \n\t"
+ "addiu %[inim], %[inim], 4 \n\t"
+ "shra_r.w %[r6], %[r6], 6 \n\t"
+ "shra_r.w %[r7], %[r7], 6 \n\t"
+ "shra_r.w %[r8], %[r8], 6 \n\t"
+ "shra_r.w %[r9], %[r9], 6 \n\t"
+ "addu %[r6], %[r6], %[r7] \n\t"
+ "subu %[r9], %[r9], %[r8] \n\t"
+ "subu %[r7], %[r6], %[r9] \n\t"
+ "addu %[r6], %[r6], %[r9] \n\t"
+ "sll %[r10], %[offset], 1 \n\t"
+ "addu %[r10], %[outre1], %[r10] \n\t"
+ "sw %[r7], 0(%[outre1]) \n\t"
+ "absq_s.w %[r7], %[r7] \n\t"
+ "sw %[r6], 4(%[r10]) \n\t"
+ "absq_s.w %[r6], %[r6] \n\t"
+ "slt %[r8], %[max], %[r7] \n\t"
+ "movn %[max], %[r7], %[r8] \n\t"
+ "slt %[r8], %[max], %[r6] \n\t"
+ "movn %[max], %[r6], %[r8] \n\t"
+ "muleq_s.w.phl %[r6], %[r0], %[r2] \n\t"
+ "muleq_s.w.phl %[r7], %[r1], %[r3] \n\t"
+ "muleq_s.w.phl %[r8], %[r4], %[r2] \n\t"
+ "muleq_s.w.phl %[r9], %[r5], %[r3] \n\t"
+ "shra_r.w %[r6], %[r6], 6 \n\t"
+ "shra_r.w %[r7], %[r7], 6 \n\t"
+ "shra_r.w %[r8], %[r8], 6 \n\t"
+ "shra_r.w %[r9], %[r9], 6 \n\t"
+ "addu %[r6], %[r6], %[r7] \n\t"
+ "subu %[r9], %[r9], %[r8] \n\t"
+ "subu %[r7], %[r6], %[r9] \n\t"
+ "addu %[r6], %[r6], %[r9] \n\t"
+ "sw %[r7], 4(%[outre1]) \n\t"
+ "absq_s.w %[r7], %[r7] \n\t"
+ "sw %[r6], 0(%[r10]) \n\t"
+ "absq_s.w %[r6], %[r6] \n\t"
+ "slt %[r8], %[max], %[r7] \n\t"
+ "movn %[max], %[r7], %[r8] \n\t"
+ "slt %[r8], %[max], %[r6] \n\t"
+ "movn %[max], %[r6], %[r8] \n\t"
+ "muleq_s.w.phr %[r6], %[r1], %[r2] \n\t"
+ "muleq_s.w.phr %[r7], %[r0], %[r3] \n\t"
+ "muleq_s.w.phr %[r8], %[r5], %[r2] \n\t"
+ "muleq_s.w.phr %[r9], %[r4], %[r3] \n\t"
+ "addiu %[outre1], %[outre1], 8 \n\t"
+ "shra_r.w %[r6], %[r6], 6 \n\t"
+ "shra_r.w %[r7], %[r7], 6 \n\t"
+ "shra_r.w %[r8], %[r8], 6 \n\t"
+ "shra_r.w %[r9], %[r9], 6 \n\t"
+ "subu %[r6], %[r6], %[r7] \n\t"
+ "addu %[r9], %[r9], %[r8] \n\t"
+ "subu %[r7], %[r6], %[r9] \n\t"
+ "addu %[r6], %[r9], %[r6] \n\t"
+ "negu %[r6], %[r6] \n\t"
+ "sll %[r10], %[offset], 1 \n\t"
+ "addu %[r10], %[outre2], %[r10] \n\t"
+ "sw %[r7], 0(%[outre2]) \n\t"
+ "absq_s.w %[r7], %[r7] \n\t"
+ "sw %[r6], 4(%[r10]) \n\t"
+ "absq_s.w %[r6], %[r6] \n\t"
+ "slt %[r8], %[max], %[r7] \n\t"
+ "movn %[max], %[r7], %[r8] \n\t"
+ "slt %[r8], %[max], %[r6] \n\t"
+ "movn %[max], %[r6], %[r8] \n\t"
+ "muleq_s.w.phl %[r6], %[r1], %[r2] \n\t"
+ "muleq_s.w.phl %[r7], %[r0], %[r3] \n\t"
+ "muleq_s.w.phl %[r8], %[r5], %[r2] \n\t"
+ "muleq_s.w.phl %[r9], %[r4], %[r3] \n\t"
+ "addiu %[offset], %[offset], -8 \n\t"
+ "shra_r.w %[r6], %[r6], 6 \n\t"
+ "shra_r.w %[r7], %[r7], 6 \n\t"
+ "shra_r.w %[r8], %[r8], 6 \n\t"
+ "shra_r.w %[r9], %[r9], 6 \n\t"
+ "subu %[r6], %[r6], %[r7] \n\t"
+ "addu %[r9], %[r9], %[r8] \n\t"
+ "subu %[r7], %[r6], %[r9] \n\t"
+ "addu %[r6], %[r9], %[r6] \n\t"
+ "negu %[r6], %[r6] \n\t"
+ "sw %[r7], 4(%[outre2]) \n\t"
+ "absq_s.w %[r7], %[r7] \n\t"
+ "sw %[r6], 0(%[r10]) \n\t"
+ "absq_s.w %[r6], %[r6] \n\t"
+ "slt %[r8], %[max], %[r7] \n\t"
+ "movn %[max], %[r7], %[r8] \n\t"
+ "slt %[r8], %[max], %[r6] \n\t"
+ "movn %[max], %[r6], %[r8] \n\t"
+ "bgtz %[k], 1b \n\t"
+ " addiu %[outre2], %[outre2], 8 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "lh %[r0], 0(%[inre]) \n\t"
+ "lh %[r1], 0(%[inim]) \n\t"
+ "lh %[r4], 0(%[cosptr]) \n\t"
+ "lh %[r5], 0(%[sinptr]) \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "mul %[r2], %[r0], %[r4] \n\t"
+ "mul %[r0], %[r0], %[r5] \n\t"
+ "mul %[r3], %[r1], %[r5] \n\t"
+ "mul %[r1], %[r1], %[r4] \n\t"
+ "addiu %[cosptr], %[cosptr], 2 \n\t"
+ "addiu %[sinptr], %[sinptr], 2 \n\t"
+ "addu %[r8], %[inre], %[offset] \n\t"
+ "addu %[r9], %[inim], %[offset] \n\t"
+ "addiu %[r2], %[r2], 16 \n\t"
+ "sra %[r2], %[r2], 5 \n\t"
+ "addiu %[r0], %[r0], 16 \n\t"
+ "sra %[r0], %[r0], 5 \n\t"
+ "addiu %[r3], %[r3], 16 \n\t"
+ "sra %[r3], %[r3], 5 \n\t"
+ "lh %[r6], 0(%[r8]) \n\t"
+ "lh %[r7], 0(%[r9]) \n\t"
+ "addiu %[r1], %[r1], 16 \n\t"
+ "sra %[r1], %[r1], 5 \n\t"
+ "mul %[r8], %[r7], %[r4] \n\t"
+ "mul %[r7], %[r7], %[r5] \n\t"
+ "mul %[r9], %[r6], %[r4] \n\t"
+ "mul %[r6], %[r6], %[r5] \n\t"
+ "addu %[r2], %[r2], %[r3] \n\t"
+ "subu %[r1], %[r1], %[r0] \n\t"
+ "sll %[r0], %[offset], 1 \n\t"
+ "addu %[r4], %[outre1], %[r0] \n\t"
+ "addu %[r5], %[outre2], %[r0] \n\t"
+ "addiu %[r8], %[r8], 16 \n\t"
+ "sra %[r8], %[r8], 5 \n\t"
+ "addiu %[r7], %[r7], 16 \n\t"
+ "sra %[r7], %[r7], 5 \n\t"
+ "addiu %[r6], %[r6], 16 \n\t"
+ "sra %[r6], %[r6], 5 \n\t"
+ "addiu %[r9], %[r9], 16 \n\t"
+ "sra %[r9], %[r9], 5 \n\t"
+ "addu %[r8], %[r8], %[r6] \n\t"
+ "negu %[r8], %[r8] \n\t"
+ "subu %[r7], %[r7], %[r9] \n\t"
+ "subu %[r6], %[r2], %[r7] \n\t"
+ "addu %[r0], %[r2], %[r7] \n\t"
+ "addu %[r3], %[r1], %[r8] \n\t"
+ "subu %[r1], %[r8], %[r1] \n\t"
+ "sw %[r6], 0(%[outre1]) \n\t"
+ "sw %[r0], 0(%[r4]) \n\t"
+ "sw %[r3], 0(%[outre2]) \n\t"
+ "sw %[r1], 0(%[r5]) \n\t"
+ "addiu %[outre1], %[outre1], 4 \n\t"
+ "addiu %[offset], %[offset], -4 \n\t"
+ "addiu %[inre], %[inre], 2 \n\t"
+ "addiu %[inim], %[inim], 2 \n\t"
+ // Inlined WebRtcSpl_MaxAbsValueW32
+ "negu %[r5], %[r6] \n\t"
+ "slt %[r2], %[r6], $zero \n\t"
+ "movn %[r6], %[r5], %[r2] \n\t"
+ "negu %[r5], %[r0] \n\t"
+ "slt %[r2], %[r0], $zero \n\t"
+ "movn %[r0], %[r5], %[r2] \n\t"
+ "negu %[r5], %[r3] \n\t"
+ "slt %[r2], %[r3], $zero \n\t"
+ "movn %[r3], %[r5], %[r2] \n\t"
+ "negu %[r5], %[r1] \n\t"
+ "slt %[r2], %[r1], $zero \n\t"
+ "movn %[r1], %[r5], %[r2] \n\t"
+ "slt %[r2], %[r6], %[r0] \n\t"
+ "slt %[r5], %[r3], %[r1] \n\t"
+ "movn %[r6], %[r0], %[r2] \n\t"
+ "movn %[r3], %[r1], %[r5] \n\t"
+ "slt %[r2], %[r6], %[r3] \n\t"
+ "movn %[r6], %[r3], %[r2] \n\t"
+ "slt %[r2], %[max], %[r6] \n\t"
+ "movn %[max], %[r6], %[r2] \n\t"
+ "bgtz %[k], 1b \n\t"
+ " addiu %[outre2], %[outre2], 4 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "clz %[max], %[max] \n\t"
+ "addiu %[max], %[max], -25 \n\t"
+ ".set pop \n\t"
+ : [inre] "=&r" (inre), [inim] "=&r" (inim),
+ [outre1] "=&r" (outre1), [outre2] "=&r" (outre2),
+ [offset] "+r" (offset), [k] "+r" (k), [r0] "=&r" (r0),
+ [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+ [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6),
+ [r7] "=&r" (r7), [r10] "=&r" (r10),
+ [r8] "=&r" (r8), [r9] "=&r" (r9), [max] "=&r" (max)
+ : [inreQ7] "r" (inreQ7), [inimQ7] "r" (inimQ7),
+ [cosptr] "r" (cosptr), [sinptr] "r" (sinptr),
+ [outre1Q16] "r" (outre1Q16), [outre2Q16] "r" (outre2Q16)
+ : "hi", "lo", "memory"
+ );
+
+ // "Fastest" vectors
+ k = FRAMESAMPLES / 4;
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[inre], %[inreQ7], 0 \n\t"
+ "addiu %[inim], %[inimQ7], 0 \n\t"
+ "addiu %[outre1], %[outre1Q16], 0 \n\t"
+ "addiu %[outre2], %[outre2Q16], 0 \n\t"
+ "bltz %[max], 2f \n\t"
+ " subu %[max1], $zero, %[max] \n\t"
+ "1: \n\t"
+ "lw %[r0], 0(%[outre1]) \n\t"
+ "lw %[r1], 0(%[outre2]) \n\t"
+ "lw %[r2], 4(%[outre1]) \n\t"
+ "lw %[r3], 4(%[outre2]) \n\t"
+ "sllv %[r0], %[r0], %[max] \n\t"
+ "sllv %[r1], %[r1], %[max] \n\t"
+ "sllv %[r2], %[r2], %[max] \n\t"
+ "sllv %[r3], %[r3], %[max] \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "addiu %[outre1], %[outre1], 8 \n\t"
+ "addiu %[outre2], %[outre2], 8 \n\t"
+ "sh %[r0], 0(%[inre]) \n\t"
+ "sh %[r1], 0(%[inim]) \n\t"
+ "sh %[r2], 2(%[inre]) \n\t"
+ "sh %[r3], 2(%[inim]) \n\t"
+ "addiu %[inre], %[inre], 4 \n\t"
+ "bgtz %[k], 1b \n\t"
+ " addiu %[inim], %[inim], 4 \n\t"
+ "b 4f \n\t"
+ " nop \n\t"
+ "2: \n\t"
+#if !defined(MIPS_DSP_R1_LE)
+ "addiu %[r4], $zero, 1 \n\t"
+ "addiu %[r5], %[max1], -1 \n\t"
+ "sllv %[r4], %[r4], %[r5] \n\t"
+#endif // #if !defined(MIPS_DSP_R1_LE)
+ "3: \n\t"
+ "lw %[r0], 0(%[outre1]) \n\t"
+ "lw %[r1], 0(%[outre2]) \n\t"
+ "lw %[r2], 4(%[outre1]) \n\t"
+ "lw %[r3], 4(%[outre2]) \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shrav_r.w %[r0], %[r0], %[max1] \n\t"
+ "shrav_r.w %[r1], %[r1], %[max1] \n\t"
+ "shrav_r.w %[r2], %[r2], %[max1] \n\t"
+ "shrav_r.w %[r3], %[r3], %[max1] \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r0], %[r0], %[r4] \n\t"
+ "addu %[r1], %[r1], %[r4] \n\t"
+ "addu %[r2], %[r2], %[r4] \n\t"
+ "addu %[r3], %[r3], %[r4] \n\t"
+ "srav %[r0], %[r0], %[max1] \n\t"
+ "srav %[r1], %[r1], %[max1] \n\t"
+ "srav %[r2], %[r2], %[max1] \n\t"
+ "srav %[r3], %[r3], %[max1] \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[outre1], %[outre1], 8 \n\t"
+ "addiu %[outre2], %[outre2], 8 \n\t"
+ "sh %[r0], 0(%[inre]) \n\t"
+ "sh %[r1], 0(%[inim]) \n\t"
+ "sh %[r2], 2(%[inre]) \n\t"
+ "sh %[r3], 2(%[inim]) \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "addiu %[inre], %[inre], 4 \n\t"
+ "bgtz %[k], 3b \n\t"
+ " addiu %[inim], %[inim], 4 \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [k] "+r" (k), [max1] "=&r" (max1), [r0] "=&r" (r0),
+ [inre] "=&r" (inre), [inim] "=&r" (inim),
+ [outre1] "=&r" (outre1), [outre2] "=&r" (outre2),
+#if !defined(MIPS_DSP_R1_LE)
+ [r4] "=&r" (r4), [r5] "=&r" (r5),
+#endif // #if !defined(MIPS_DSP_R1_LE)
+ [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
+ : [max] "r" (max), [inreQ7] "r" (inreQ7),
+ [inimQ7] "r" (inimQ7), [outre1Q16] "r" (outre1Q16),
+ [outre2Q16] "r" (outre2Q16)
+ : "memory"
+ );
+
+ WebRtcIsacfix_FftRadix16Fastest(inreQ7, inimQ7, 1); // real call
+
+ // All the remaining processing is done inside a single loop to avoid
+ // unnecessary memory accesses. MIPS DSPr2 version processes two samples
+ // at a time.
+ cosptr = (int16_t*)WebRtcIsacfix_kCosTab1;
+ sinptr = (int16_t*)WebRtcIsacfix_kSinTab1;
+ k = FRAMESAMPLES / 2;
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "addiu %[inre], %[inreQ7], 0 \n\t"
+ "addiu %[inim], %[inimQ7], 0 \n\t"
+ "addiu %[outre1], %[outre1Q16], 0 \n\t"
+ "addiu %[outre2], %[outre2Q16], 0 \n\t"
+ "addiu %[r4], $zero, 273 \n\t"
+ "addiu %[r5], $zero, 31727 \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "addiu %[max], %[max], 16 \n\t"
+ "replv.ph %[r4], %[r4] \n\t"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ "bltz %[max], 2f \n\t"
+ " subu %[max1], $zero, %[max] \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "addiu %[max], %[max], 1 \n\t"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ "1: \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "lwl %[r0], 0(%[inre]) \n\t"
+ "lwl %[r1], 0(%[inim]) \n\t"
+ "lh %[r2], 0(%[cosptr]) \n\t"
+ "lwr %[r0], 0(%[inre]) \n\t"
+ "lwr %[r1], 0(%[inim]) \n\t"
+ "lh %[r3], 0(%[sinptr]) \n\t"
+ "muleq_s.w.phr %[r6], %[r0], %[r4] \n\t"
+ "muleq_s.w.phr %[r7], %[r1], %[r4] \n\t"
+ "muleq_s.w.phl %[r0], %[r0], %[r4] \n\t"
+ "muleq_s.w.phl %[r1], %[r1], %[r4] \n\t"
+ "addiu %[k], %[k], -2 \n\t"
+ "addiu %[inre], %[inre], 4 \n\t"
+ "addiu %[inim], %[inim], 4 \n\t"
+ "shrav_r.w %[r6], %[r6], %[max] \n\t"
+ "shrav_r.w %[r7], %[r7], %[max] \n\t"
+ "mult $ac0, %[r2], %[r6] \n\t"
+ "mult $ac1, %[r3], %[r7] \n\t"
+ "mult $ac2, %[r2], %[r7] \n\t"
+ "mult $ac3, %[r3], %[r6] \n\t"
+ "lh %[r2], 2(%[cosptr]) \n\t"
+ "lh %[r3], 2(%[sinptr]) \n\t"
+ "extr_r.w %[r6], $ac0, 14 \n\t"
+ "extr_r.w %[r7], $ac1, 14 \n\t"
+ "extr_r.w %[r8], $ac2, 14 \n\t"
+ "extr_r.w %[r9], $ac3, 14 \n\t"
+ "shrav_r.w %[r0], %[r0], %[max] \n\t"
+ "shrav_r.w %[r1], %[r1], %[max] \n\t"
+ "mult $ac0, %[r2], %[r0] \n\t"
+ "mult $ac1, %[r3], %[r1] \n\t"
+ "mult $ac2, %[r2], %[r1] \n\t"
+ "mult $ac3, %[r3], %[r0] \n\t"
+ "addiu %[cosptr], %[cosptr], 4 \n\t"
+ "extr_r.w %[r0], $ac0, 14 \n\t"
+ "extr_r.w %[r1], $ac1, 14 \n\t"
+ "extr_r.w %[r2], $ac2, 14 \n\t"
+ "extr_r.w %[r3], $ac3, 14 \n\t"
+ "subu %[r6], %[r6], %[r7] \n\t"
+ "addu %[r8], %[r8], %[r9] \n\t"
+ "mult $ac0, %[r5], %[r6] \n\t"
+ "mult $ac1, %[r5], %[r8] \n\t"
+ "addiu %[sinptr], %[sinptr], 4 \n\t"
+ "subu %[r0], %[r0], %[r1] \n\t"
+ "addu %[r2], %[r2], %[r3] \n\t"
+ "extr_r.w %[r1], $ac0, 11 \n\t"
+ "extr_r.w %[r3], $ac1, 11 \n\t"
+ "mult $ac2, %[r5], %[r0] \n\t"
+ "mult $ac3, %[r5], %[r2] \n\t"
+ "sw %[r1], 0(%[outre1]) \n\t"
+ "sw %[r3], 0(%[outre2]) \n\t"
+ "addiu %[outre1], %[outre1], 8 \n\t"
+ "extr_r.w %[r0], $ac2, 11 \n\t"
+ "extr_r.w %[r2], $ac3, 11 \n\t"
+ "sw %[r0], -4(%[outre1]) \n\t"
+ "sw %[r2], 4(%[outre2]) \n\t"
+ "bgtz %[k], 1b \n\t"
+ " addiu %[outre2], %[outre2], 8 \n\t"
+ "b 3f \n\t"
+#else // #if defined(MIPS_DSP_R2_LE)
+ "lh %[r0], 0(%[inre]) \n\t"
+ "lh %[r1], 0(%[inim]) \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "srav %[r0], %[r0], %[max] \n\t"
+ "srav %[r1], %[r1], %[max] \n\t"
+ "sra %[r2], %[r0], 16 \n\t"
+ "andi %[r0], %[r0], 0xFFFF \n\t"
+ "sra %[r0], %[r0], 1 \n\t"
+ "sra %[r3], %[r1], 16 \n\t"
+ "andi %[r1], %[r1], 0xFFFF \n\t"
+ "sra %[r1], %[r1], 1 \n\t"
+ "mul %[r2], %[r2], %[r4] \n\t"
+ "mul %[r0], %[r0], %[r4] \n\t"
+ "mul %[r3], %[r3], %[r4] \n\t"
+ "mul %[r1], %[r1], %[r4] \n\t"
+ "addiu %[inre], %[inre], 2 \n\t"
+ "addiu %[inim], %[inim], 2 \n\t"
+ "lh %[r6], 0(%[cosptr]) \n\t"
+ "lh %[r7], 0(%[sinptr]) \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r0], %[r0], 15 \n\t"
+ "shra_r.w %[r1], %[r1], 15 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r0], %[r0], 0x4000 \n\t"
+ "addiu %[r1], %[r1], 0x4000 \n\t"
+ "sra %[r0], %[r0], 15 \n\t"
+ "sra %[r1], %[r1], 15 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r0], %[r2], %[r0] \n\t"
+ "addu %[r1], %[r3], %[r1] \n\t"
+ "sra %[r2], %[r0], 16 \n\t"
+ "andi %[r0], %[r0], 0xFFFF \n\t"
+ "mul %[r9], %[r2], %[r6] \n\t"
+ "mul %[r2], %[r2], %[r7] \n\t"
+ "mul %[r8], %[r0], %[r6] \n\t"
+ "mul %[r0], %[r0], %[r7] \n\t"
+ "sra %[r3], %[r3], 16 \n\t"
+ "andi %[r1], %[r1], 0xFFFF \n\t"
+ "sll %[r9], %[r9], 2 \n\t"
+ "sll %[r2], %[r2], 2 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r8], %[r8], 14 \n\t"
+ "shra_r.w %[r0], %[r0], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r8], %[r8], 0x2000 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r8], %[r8], 14 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r9], %[r9], %[r8] \n\t"
+ "addu %[r2], %[r2], %[r0] \n\t"
+ "mul %[r0], %[r3], %[r6] \n\t"
+ "mul %[r3], %[r3], %[r7] \n\t"
+ "mul %[r8], %[r1], %[r6] \n\t"
+ "mul %[r1], %[r1], %[r8] \n\t"
+ "addiu %[cosptr], %[cosptr], 2 \n\t"
+ "addiu %[sinptr], %[sinptr], 2 \n\t"
+ "sll %[r0], %[r0], 2 \n\t"
+ "sll %[r3], %[r3], 2 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r8], %[r8], 14 \n\t"
+ "shra_r.w %[r1], %[r1], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r8], %[r8], 0x2000 \n\t"
+ "addiu %[r1], %[r1], 0x2000 \n\t"
+ "sra %[r8], %[r8], 14 \n\t"
+ "sra %[r1], %[r1], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r0], %[r0], %[r8] \n\t"
+ "addu %[r3], %[r3], %[r1] \n\t"
+ "subu %[r9], %[r9], %[r3] \n\t"
+ "addu %[r0], %[r0], %[r2] \n\t"
+ "sra %[r1], %[r9], 16 \n\t"
+ "andi %[r9], %[r9], 0xFFFF \n\t"
+ "mul %[r1], %[r1], %[r5] \n\t"
+ "mul %[r9], %[r9], %[r5] \n\t"
+ "sra %[r2], %[r0], 16 \n\t"
+ "andi %[r0], %[r0], 0xFFFF \n\t"
+ "mul %[r2], %[r2], %[r5] \n\t"
+ "mul %[r0], %[r0], %[r5] \n\t"
+ "sll %[r1], %[r1], 5 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r9], %[r9], 11 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r9], %[r9], 0x400 \n\t"
+ "sra %[r9], %[r9], 11 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r1], %[r1], %[r9] \n\t"
+ "sll %[r2], %[r2], 5 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r0], %[r0], 11 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r0], %[r0], 0x400 \n\t"
+ "sra %[r0], %[r0], 11 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r0], %[r0], %[r2] \n\t"
+ "sw %[r1], 0(%[outre1]) \n\t"
+ "addiu %[outre1], %[outre1], 4 \n\t"
+ "sw %[r0], 0(%[outre2]) \n\t"
+ "bgtz %[k], 1b \n\t"
+ " addiu %[outre2], %[outre2], 4 \n\t"
+ "b 3f \n\t"
+ " nop \n\t"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ "2: \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "addiu %[max1], %[max1], -1 \n\t"
+ "21: \n\t"
+ "lwl %[r0], 0(%[inre]) \n\t"
+ "lwl %[r1], 0(%[inim]) \n\t"
+ "lh %[r2], 0(%[cosptr]) \n\t"
+ "lwr %[r0], 0(%[inre]) \n\t"
+ "lwr %[r1], 0(%[inim]) \n\t"
+ "lh %[r3], 0(%[sinptr]) \n\t"
+ "muleq_s.w.phr %[r6], %[r0], %[r4] \n\t"
+ "muleq_s.w.phr %[r7], %[r1], %[r4] \n\t"
+ "muleq_s.w.phl %[r0], %[r0], %[r4] \n\t"
+ "muleq_s.w.phl %[r1], %[r1], %[r4] \n\t"
+ "addiu %[k], %[k], -2 \n\t"
+ "addiu %[inre], %[inre], 4 \n\t"
+ "addiu %[inim], %[inim], 4 \n\t"
+ "sllv %[r6], %[r6], %[max1] \n\t"
+ "sllv %[r7], %[r7], %[max1] \n\t"
+ "mult $ac0, %[r2], %[r6] \n\t"
+ "mult $ac1, %[r3], %[r7] \n\t"
+ "mult $ac2, %[r2], %[r7] \n\t"
+ "mult $ac3, %[r3], %[r6] \n\t"
+ "lh %[r2], 2(%[cosptr]) \n\t"
+ "lh %[r3], 2(%[sinptr]) \n\t"
+ "extr_r.w %[r6], $ac0, 14 \n\t"
+ "extr_r.w %[r7], $ac1, 14 \n\t"
+ "extr_r.w %[r8], $ac2, 14 \n\t"
+ "extr_r.w %[r9], $ac3, 14 \n\t"
+ "sllv %[r0], %[r0], %[max1] \n\t"
+ "sllv %[r1], %[r1], %[max1] \n\t"
+ "mult $ac0, %[r2], %[r0] \n\t"
+ "mult $ac1, %[r3], %[r1] \n\t"
+ "mult $ac2, %[r2], %[r1] \n\t"
+ "mult $ac3, %[r3], %[r0] \n\t"
+ "addiu %[cosptr], %[cosptr], 4 \n\t"
+ "extr_r.w %[r0], $ac0, 14 \n\t"
+ "extr_r.w %[r1], $ac1, 14 \n\t"
+ "extr_r.w %[r2], $ac2, 14 \n\t"
+ "extr_r.w %[r3], $ac3, 14 \n\t"
+ "subu %[r6], %[r6], %[r7] \n\t"
+ "addu %[r8], %[r8], %[r9] \n\t"
+ "mult $ac0, %[r5], %[r6] \n\t"
+ "mult $ac1, %[r5], %[r8] \n\t"
+ "addiu %[sinptr], %[sinptr], 4 \n\t"
+ "subu %[r0], %[r0], %[r1] \n\t"
+ "addu %[r2], %[r2], %[r3] \n\t"
+ "extr_r.w %[r1], $ac0, 11 \n\t"
+ "extr_r.w %[r3], $ac1, 11 \n\t"
+ "mult $ac2, %[r5], %[r0] \n\t"
+ "mult $ac3, %[r5], %[r2] \n\t"
+ "sw %[r1], 0(%[outre1]) \n\t"
+ "sw %[r3], 0(%[outre2]) \n\t"
+ "addiu %[outre1], %[outre1], 8 \n\t"
+ "extr_r.w %[r0], $ac2, 11 \n\t"
+ "extr_r.w %[r2], $ac3, 11 \n\t"
+ "sw %[r0], -4(%[outre1]) \n\t"
+ "sw %[r2], 4(%[outre2]) \n\t"
+ "bgtz %[k], 21b \n\t"
+ " addiu %[outre2], %[outre2], 8 \n\t"
+ "b 3f \n\t"
+ " nop \n\t"
+#else // #if defined(MIPS_DSP_R2_LE)
+ "lh %[r0], 0(%[inre]) \n\t"
+ "lh %[r1], 0(%[inim]) \n\t"
+ "addiu %[k], %[k], -1 \n\t"
+ "sllv %[r0], %[r0], %[max1] \n\t"
+ "sllv %[r1], %[r1], %[max1] \n\t"
+ "sra %[r2], %[r0], 16 \n\t"
+ "andi %[r0], %[r0], 0xFFFF \n\t"
+ "sra %[r0], %[r0], 1 \n\t"
+ "sra %[r3], %[r1], 16 \n\t"
+ "andi %[r1], %[r1], 0xFFFF \n\t"
+ "sra %[r1], %[r1], 1 \n\t"
+ "mul %[r2], %[r2], %[r4] \n\t"
+ "mul %[r0], %[r0], %[r4] \n\t"
+ "mul %[r3], %[r3], %[r4] \n\t"
+ "mul %[r1], %[r1], %[r4] \n\t"
+ "addiu %[inre], %[inre], 2 \n\t"
+ "addiu %[inim], %[inim], 2 \n\t"
+ "lh %[r6], 0(%[cosptr]) \n\t"
+ "lh %[r7], 0(%[sinptr]) \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r0], %[r0], 15 \n\t"
+ "shra_r.w %[r1], %[r1], 15 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r0], %[r0], 0x4000 \n\t"
+ "addiu %[r1], %[r1], 0x4000 \n\t"
+ "sra %[r0], %[r0], 15 \n\t"
+ "sra %[r1], %[r1], 15 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r0], %[r2], %[r0] \n\t"
+ "addu %[r1], %[r3], %[r1] \n\t"
+ "sra %[r2], %[r0], 16 \n\t"
+ "andi %[r0], %[r0], 0xFFFF \n\t"
+ "mul %[r9], %[r2], %[r6] \n\t"
+ "mul %[r2], %[r2], %[r7] \n\t"
+ "mul %[r8], %[r0], %[r6] \n\t"
+ "mul %[r0], %[r0], %[r7] \n\t"
+ "sra %[r3], %[r1], 16 \n\t"
+ "andi %[r1], %[r1], 0xFFFF \n\t"
+ "sll %[r9], %[r9], 2 \n\t"
+ "sll %[r2], %[r2], 2 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r8], %[r8], 14 \n\t"
+ "shra_r.w %[r0], %[r0], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r8], %[r8], 0x2000 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r8], %[r8], 14 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r9], %[r9], %[r8] \n\t"
+ "addu %[r2], %[r2], %[r0] \n\t"
+ "mul %[r0], %[r3], %[r6] \n\t"
+ "mul %[r3], %[r3], %[r7] \n\t"
+ "mul %[r8], %[r1], %[r6] \n\t"
+ "mul %[r1], %[r1], %[r7] \n\t"
+ "addiu %[cosptr], %[cosptr], 2 \n\t"
+ "addiu %[sinptr], %[sinptr], 2 \n\t"
+ "sll %[r0], %[r0], 2 \n\t"
+ "sll %[r3], %[r3], 2 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r8], %[r8], 14 \n\t"
+ "shra_r.w %[r1], %[r1], 14 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r8], %[r8], 0x2000 \n\t"
+ "addiu %[r1], %[r1], 0x2000 \n\t"
+ "sra %[r8], %[r8], 14 \n\t"
+ "sra %[r1], %[r1], 14 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r0], %[r0], %[r8] \n\t"
+ "addu %[r3], %[r3], %[r1] \n\t"
+ "subu %[r9], %[r9], %[r3] \n\t"
+ "addu %[r0], %[r0], %[r2] \n\t"
+ "sra %[r1], %[r9], 16 \n\t"
+ "andi %[r9], %[r9], 0xFFFF \n\t"
+ "mul %[r1], %[r1], %[r5] \n\t"
+ "mul %[r9], %[r9], %[r5] \n\t"
+ "sra %[r2], %[r0], 16 \n\t"
+ "andi %[r0], %[r0], 0xFFFF \n\t"
+ "mul %[r2], %[r2], %[r5] \n\t"
+ "mul %[r0], %[r0], %[r5] \n\t"
+ "sll %[r1], %[r1], 5 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r9], %[r9], 11 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r9], %[r9], 0x400 \n\t"
+ "sra %[r9], %[r9], 11 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r1], %[r1], %[r9] \n\t"
+ "sll %[r2], %[r2], 5 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+ "shra_r.w %[r0], %[r0], 11 \n\t"
+#else // #if defined(MIPS_DSP_R1_LE)
+ "addiu %[r0], %[r0], 0x400 \n\t"
+ "sra %[r0], %[r0], 11 \n\t"
+#endif // #if defined(MIPS_DSP_R1_LE)
+ "addu %[r0], %[r0], %[r2] \n\t"
+ "sw %[r1], 0(%[outre1]) \n\t"
+ "addiu %[outre1], %[outre1], 4 \n\t"
+ "sw %[r0], 0(%[outre2]) \n\t"
+ "bgtz %[k], 2b \n\t"
+ " addiu %[outre2], %[outre2], 4 \n\t"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ "3: \n\t"
+ ".set pop \n\t"
+ : [k] "+r" (k), [r0] "=&r" (r0), [r1] "=&r" (r1),
+ [r2] "=&r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
+ [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+ [r8] "=&r" (r8), [r9] "=&r" (r9), [max1] "=&r" (max1),
+ [inre] "=&r" (inre), [inim] "=&r" (inim),
+ [outre1] "=&r" (outre1), [outre2] "=&r" (outre2)
+ : [max] "r" (max), [inreQ7] "r" (inreQ7),
+ [inimQ7] "r" (inimQ7), [cosptr] "r" (cosptr),
+ [sinptr] "r" (sinptr), [outre1Q16] "r" (outre1Q16),
+ [outre2Q16] "r" (outre2Q16)
+ : "hi", "lo", "memory"
+#if defined(MIPS_DSP_R2_LE)
+ , "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
+#endif // #if defined(MIPS_DSP_R2_LE)
+ );
+}
diff --git a/modules/audio_coding/codecs/isac/fix/source/transform_tables.c b/modules/audio_coding/codecs/isac/fix/source/transform_tables.c
new file mode 100644
index 00000000..ee96b8e3
--- /dev/null
+++ b/modules/audio_coding/codecs/isac/fix/source/transform_tables.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains trigonometric functions look-up tables used in
+ * transform functions WebRtcIsacfix_Time2Spec and WebRtcIsacfix_Spec2Time.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h"
+#include "webrtc/typedefs.h"
+
+#if !(defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
+/* Cosine table 1 in Q14. */
+const int16_t WebRtcIsacfix_kCosTab1[FRAMESAMPLES/2] = {
+ 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270,
+ 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880,
+ 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218,
+ 15137, 15053, 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295,
+ 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380, 13255, 13128,
+ 12998, 12867, 12733, 12597, 12458, 12318, 12176, 12031, 11885, 11736,
+ 11585, 11433, 11278, 11121, 10963, 10803, 10641, 10477, 10311, 10143,
+ 9974, 9803, 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377,
+ 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859, 6664, 6467,
+ 6270, 6071, 5872, 5671, 5469, 5266, 5063, 4859, 4653, 4447,
+ 4240, 4033, 3825, 3616, 3406, 3196, 2986, 2775, 2563, 2351,
+ 2139, 1926, 1713, 1499, 1285, 1072, 857, 643, 429, 214,
+ 0, -214, -429, -643, -857, -1072, -1285, -1499, -1713, -1926,
+ -2139, -2351, -2563, -2775, -2986, -3196, -3406, -3616, -3825, -4033,
+ -4240, -4447, -4653, -4859, -5063, -5266, -5469, -5671, -5872, -6071,
+ -6270, -6467, -6664, -6859, -7053, -7246, -7438, -7629, -7818, -8006,
+ -8192, -8377, -8561, -8743, -8923, -9102, -9280, -9456, -9630, -9803,
+ -9974, -10143, -10311, -10477, -10641, -10803, -10963, -11121, -11278, -11433,
+ -11585, -11736, -11885, -12031, -12176, -12318, -12458, -12597, -12733,
+ -12867, -12998, -13128, -13255, -13380, -13502, -13623, -13741, -13856,
+ -13970, -14081, -14189, -14295, -14399, -14500, -14598, -14694, -14788,
+ -14879, -14968, -15053, -15137, -15218, -15296, -15371, -15444, -15515,
+ -15582, -15647, -15709, -15769, -15826, -15880, -15931, -15980, -16026,
+ -16069, -16110, -16147, -16182, -16214, -16244, -16270, -16294, -16315,
+ -16333, -16349, -16362, -16371, -16378, -16383
+};
+
+/* Sine table 1 in Q14. */
+const int16_t WebRtcIsacfix_kSinTab1[FRAMESAMPLES/2] = {
+ 0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926,
+ 2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033,
+ 4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071,
+ 6270, 6467, 6664, 6859, 7053, 7246, 7438, 7629, 7818, 8006,
+ 8192, 8377, 8561, 8743, 8923, 9102, 9280, 9456, 9630, 9803,
+ 9974, 10143, 10311, 10477, 10641, 10803, 10963, 11121, 11278, 11433,
+ 11585, 11736, 11885, 12031, 12176, 12318, 12458, 12597, 12733, 12867,
+ 12998, 13128, 13255, 13380, 13502, 13623, 13741, 13856, 13970, 14081,
+ 14189, 14295, 14399, 14500, 14598, 14694, 14788, 14879, 14968, 15053,
+ 15137, 15218, 15296, 15371, 15444, 15515, 15582, 15647, 15709, 15769,
+ 15826, 15880, 15931, 15980, 16026, 16069, 16110, 16147, 16182, 16214,
+ 16244, 16270, 16294, 16315, 16333, 16349, 16362, 16371, 16378, 16383,
+ 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270,
+ 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880,
+ 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218,
+ 15137, 15053, 14968, 14879, 14788, 14694, 14598, 14500, 14399, 14295,
+ 14189, 14081, 13970, 13856, 13741, 13623, 13502, 13380, 13255, 13128,
+ 12998, 12867, 12733, 12597, 12458, 12318, 12176, 12031, 11885, 11736,
+ 11585, 11433, 11278, 11121, 10963, 10803, 10641, 10477, 10311, 10143,
+ 9974, 9803, 9630, 9456, 9280, 9102, 8923, 8743, 8561, 8377,
+ 8192, 8006, 7818, 7629, 7438, 7246, 7053, 6859, 6664, 6467,
+ 6270, 6071, 5872, 5671, 5469, 5266, 5063, 4859, 4653, 4447,
+ 4240, 4033, 3825, 3616, 3406, 3196, 2986, 2775, 2563, 2351,
+ 2139, 1926, 1713, 1499, 1285, 1072, 857, 643, 429, 214
+};
+
+
+/* Sine table 2 in Q14. */
+const int16_t WebRtcIsacfix_kSinTab2[FRAMESAMPLES/4] = {
+ 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257,
+ 16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853,
+ 15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178,
+ 15095, -15011, 14924, -14834, 14741, -14647, 14549, -14449, 14347, -14242,
+ 14135, -14025, 13913, -13799, 13682, -13563, 13441, -13318, 13192, -13063,
+ 12933, -12800, 12665, -12528, 12389, -12247, 12104, -11958, 11810, -11661,
+ 11509, -11356, 11200, -11042, 10883, -10722, 10559, -10394, 10227, -10059,
+ 9889, -9717, 9543, -9368, 9191, -9013, 8833, -8652, 8469, -8285,
+ 8099, -7912, 7723, -7534, 7342, -7150, 6957, -6762, 6566, -6369,
+ 6171, -5971, 5771, -5570, 5368, -5165, 4961, -4756, 4550, -4344,
+ 4137, -3929, 3720, -3511, 3301, -3091, 2880, -2669, 2457, -2245,
+ 2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107
+};
+#endif
+
+#if defined(MIPS32_LE)
+/* Cosine table 2 in Q14. Used only on MIPS platforms. */
+const int16_t WebRtcIsacfix_kCosTab2[FRAMESAMPLES/4] = {
+ 107, -322, 536, -750, 965, -1179, 1392, -1606, 1819, -2032,
+ 2245, -2457, 2669, -2880, 3091, -3301, 3511, -3720, 3929, -4137,
+ 4344, -4550, 4756, -4961, 5165, -5368, 5570, -5771, 5971, -6171,
+ 6369, -6566, 6762, -6957, 7150, -7342, 7534, -7723, 7912, -8099,
+ 8285, -8469, 8652, -8833, 9013, -9191, 9368, -9543, 9717, -9889,
+ 10059, -10227, 10394, -10559, 10722, -10883, 11042, -11200, 11356, -11509,
+ 11661, -11810, 11958, -12104, 12247, -12389, 12528, -12665, 12800, -12933,
+ 13063, -13192, 13318, -13441, 13563, -13682, 13799, -13913, 14025, -14135,
+ 14242, -14347, 14449, -14549, 14647, -14741, 14834, -14924, 15011, -15095,
+ 15178, -15257, 15334, -15408, 15480, -15549, 15615, -15679, 15739, -15798,
+ 15853, -15906, 15956, -16003, 16048, -16090, 16129, -16165, 16199, -16229,
+ 16257, -16283, 16305, -16325, 16342, -16356, 16367, -16375, 16381, -16384
+};
+#endif