diff options
Diffstat (limited to 'src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S')
-rw-r--r-- | src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S b/src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S new file mode 100644 index 0000000000..7ce3b6f26d --- /dev/null +++ b/src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S @@ -0,0 +1,147 @@ +@ +@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ + +@ Contains the core loop routine for the pitch filter function in iSAC, +@ optimized for ARMv7 platforms. +@ +@ Output is bit-exact with the reference C code in pitch_filter.c. + +#include "settings.h" + +.arch armv6 +.align 2 +.global WebRtcIsacfix_PitchFilterCore + + +@ void WebRtcIsacfix_PitchFilterCore(int loopNumber, +@ WebRtc_Word16 gain, +@ int index, +@ WebRtc_Word16 sign, +@ WebRtc_Word16* inputState, +@ WebRtc_Word16* outputBuf2, +@ const WebRtc_Word16* coefficient, +@ WebRtc_Word16* inputBuf, +@ WebRtc_Word16* outputBuf, +@ int* index2) { + +WebRtcIsacfix_PitchFilterCore: +.fnstart + push {r4-r11} + sub sp, #8 + + str r0, [sp] @ loopNumber + str r3, [sp, #4] @ sign + ldr r3, [sp, #44] @ outputBuf2 + ldr r6, [sp, #60] @ index2 + ldr r7, [r6] @ *index2 + ldr r8, [sp, #52] @ inputBuf + ldr r12, [sp, #56] @ outputBuf + + add r4, r7, r0 + str r4, [r6] @ Store return value to index2. + + mov r10, r7, asl #1 + add r12, r10 @ &outputBuf[*index2] + add r8, r10 @ &inputBuf[*index2] + + add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE + add r6, r3, r4, lsl #1 @ &outputBuf2[*index2 + PITCH_BUFFSIZE] + sub r4, r2 @ r2: index + sub r4, #2 @ *index2 + PITCH_BUFFSIZE - index - 2 + add r3, r4, lsl #1 @ &ubufQQpos2[*index2] + ldr r9, [sp, #48] @ coefficient + +LOOP: +@ Usage of registers in the loop: +@ r0: loop counter +@ r1: gain +@ r2: tmpW32 +@ r3: &ubufQQpos2[] +@ r6: &outputBuf2[] +@ r8: &inputBuf[] +@ r9: &coefficient[] +@ r12: &outputBuf[] +@ r4, r5, r7, r10, r11: scratch + + @ Filter to get fractional pitch. + @ The pitch filter loop here is unrolled with 9 multipications. + pld [r3] + ldr r10, [r3], #4 @ ubufQQpos2[*index2 + 0, *index2 + 1] + ldr r4, [r9], #4 @ coefficient[0, 1] + ldr r11, [r3], #4 + ldr r5, [r9], #4 + smuad r2, r10, r4 + smlad r2, r11, r5, r2 + + ldr r10, [r3], #4 + ldr r4, [r9], #4 + ldr r11, [r3], #4 + ldr r5, [r9], #4 + smlad r2, r10, r4, r2 + ldrh r10, [r3], #-14 @ r3 back to &ubufQQpos2[*index2]. + ldrh r4, [r9], #-16 @ r9 back to &coefficient[0]. + smlad r2, r11, r5, r2 + smlabb r2, r10, r4, r2 + + @ Saturate to avoid overflow in tmpW16. + asr r2, #1 + add r4, r2, #0x1000 + ssat r7, #16, r4, asr #13 + + @ Shift low pass filter state, and excute the low pass filter. + @ The memmove() and the low pass filter loop are unrolled and mixed. + smulbb r5, r1, r7 + add r7, r5, #0x800 + asr r7, #12 @ Get the value for inputState[0]. + ldr r11, [sp, #40] @ inputState + pld [r11] + adr r10, kDampFilter + ldrsh r4, [r10], #2 @ kDampFilter[0] + mul r2, r7, r4 + ldr r4, [r11] @ inputState[0, 1], before shift. + strh r7, [r11] @ inputState[0], after shift. + ldr r5, [r11, #4] @ inputState[2, 3], before shift. + ldr r7, [r10], #4 @ kDampFilter[1, 2] + ldr r10, [r10] @ kDampFilter[3, 4] + str r4, [r11, #2] @ inputState[1, 2], after shift. + str r5, [r11, #6] @ inputState[3, 4], after shift. + smlad r2, r4, r7, r2 + smlad r2, r5, r10, r2 + + @ Saturate to avoid overflow. + @ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF], + @ to avoid overflow in the next saturation step. + asr r2, #1 + add r10, r2, #0x2000 + ssat r10, #16, r10, asr #14 + + @ Subtract from input and update buffer. + ldr r11, [sp, #4] @ sign + ldrsh r4, [r8] + ldrsh r7, [r8], #2 @ inputBuf[*index2] + smulbb r5, r11, r10 + subs r0, #1 + sub r4, r5 + ssat r2, #16, r4 + strh r2, [r12], #2 @ outputBuf[*index2] + + add r2, r7 + ssat r2, #16, r2 + strh r2, [r6], #2 @ outputBuff2[*index2 + PITCH_BUFFSIZE] + bgt LOOP + + add sp, #8 + pop {r4-r11} + bx lr +.fnend + +.align 2 +kDampFilter: + .short -2294, 8192, 20972, 8192, -2294 |