aboutsummaryrefslogtreecommitdiff
path: root/src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S')
-rw-r--r--src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S147
1 files changed, 147 insertions, 0 deletions
diff --git a/src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S b/src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S
new file mode 100644
index 0000000000..7ce3b6f26d
--- /dev/null
+++ b/src/modules/audio_coding/codecs/isac/fix/source/pitch_filter_armv6.S
@@ -0,0 +1,147 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS. All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ Contains the core loop routine for the pitch filter function in iSAC,
+@ optimized for ARMv7 platforms.
+@
+@ Output is bit-exact with the reference C code in pitch_filter.c.
+
+#include "settings.h"
+
+.arch armv6
+.align 2
+.global WebRtcIsacfix_PitchFilterCore
+
+
+@ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
+@ WebRtc_Word16 gain,
+@ int index,
+@ WebRtc_Word16 sign,
+@ WebRtc_Word16* inputState,
+@ WebRtc_Word16* outputBuf2,
+@ const WebRtc_Word16* coefficient,
+@ WebRtc_Word16* inputBuf,
+@ WebRtc_Word16* outputBuf,
+@ int* index2) {
+
+WebRtcIsacfix_PitchFilterCore:
+.fnstart
+ push {r4-r11}
+ sub sp, #8
+
+ str r0, [sp] @ loopNumber
+ str r3, [sp, #4] @ sign
+ ldr r3, [sp, #44] @ outputBuf2
+ ldr r6, [sp, #60] @ index2
+ ldr r7, [r6] @ *index2
+ ldr r8, [sp, #52] @ inputBuf
+ ldr r12, [sp, #56] @ outputBuf
+
+ add r4, r7, r0
+ str r4, [r6] @ Store return value to index2.
+
+ mov r10, r7, asl #1
+ add r12, r10 @ &outputBuf[*index2]
+ add r8, r10 @ &inputBuf[*index2]
+
+ add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
+ add r6, r3, r4, lsl #1 @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
+ sub r4, r2 @ r2: index
+ sub r4, #2 @ *index2 + PITCH_BUFFSIZE - index - 2
+ add r3, r4, lsl #1 @ &ubufQQpos2[*index2]
+ ldr r9, [sp, #48] @ coefficient
+
+LOOP:
+@ Usage of registers in the loop:
+@ r0: loop counter
+@ r1: gain
+@ r2: tmpW32
+@ r3: &ubufQQpos2[]
+@ r6: &outputBuf2[]
+@ r8: &inputBuf[]
+@ r9: &coefficient[]
+@ r12: &outputBuf[]
+@ r4, r5, r7, r10, r11: scratch
+
+ @ Filter to get fractional pitch.
+ @ The pitch filter loop here is unrolled with 9 multipications.
+ pld [r3]
+ ldr r10, [r3], #4 @ ubufQQpos2[*index2 + 0, *index2 + 1]
+ ldr r4, [r9], #4 @ coefficient[0, 1]
+ ldr r11, [r3], #4
+ ldr r5, [r9], #4
+ smuad r2, r10, r4
+ smlad r2, r11, r5, r2
+
+ ldr r10, [r3], #4
+ ldr r4, [r9], #4
+ ldr r11, [r3], #4
+ ldr r5, [r9], #4
+ smlad r2, r10, r4, r2
+ ldrh r10, [r3], #-14 @ r3 back to &ubufQQpos2[*index2].
+ ldrh r4, [r9], #-16 @ r9 back to &coefficient[0].
+ smlad r2, r11, r5, r2
+ smlabb r2, r10, r4, r2
+
+ @ Saturate to avoid overflow in tmpW16.
+ asr r2, #1
+ add r4, r2, #0x1000
+ ssat r7, #16, r4, asr #13
+
+ @ Shift low pass filter state, and excute the low pass filter.
+ @ The memmove() and the low pass filter loop are unrolled and mixed.
+ smulbb r5, r1, r7
+ add r7, r5, #0x800
+ asr r7, #12 @ Get the value for inputState[0].
+ ldr r11, [sp, #40] @ inputState
+ pld [r11]
+ adr r10, kDampFilter
+ ldrsh r4, [r10], #2 @ kDampFilter[0]
+ mul r2, r7, r4
+ ldr r4, [r11] @ inputState[0, 1], before shift.
+ strh r7, [r11] @ inputState[0], after shift.
+ ldr r5, [r11, #4] @ inputState[2, 3], before shift.
+ ldr r7, [r10], #4 @ kDampFilter[1, 2]
+ ldr r10, [r10] @ kDampFilter[3, 4]
+ str r4, [r11, #2] @ inputState[1, 2], after shift.
+ str r5, [r11, #6] @ inputState[3, 4], after shift.
+ smlad r2, r4, r7, r2
+ smlad r2, r5, r10, r2
+
+ @ Saturate to avoid overflow.
+ @ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
+ @ to avoid overflow in the next saturation step.
+ asr r2, #1
+ add r10, r2, #0x2000
+ ssat r10, #16, r10, asr #14
+
+ @ Subtract from input and update buffer.
+ ldr r11, [sp, #4] @ sign
+ ldrsh r4, [r8]
+ ldrsh r7, [r8], #2 @ inputBuf[*index2]
+ smulbb r5, r11, r10
+ subs r0, #1
+ sub r4, r5
+ ssat r2, #16, r4
+ strh r2, [r12], #2 @ outputBuf[*index2]
+
+ add r2, r7
+ ssat r2, #16, r2
+ strh r2, [r6], #2 @ outputBuff2[*index2 + PITCH_BUFFSIZE]
+ bgt LOOP
+
+ add sp, #8
+ pop {r4-r11}
+ bx lr
+.fnend
+
+.align 2
+kDampFilter:
+ .short -2294, 8192, 20972, 8192, -2294