aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXin Li <delphij@google.com>2019-10-30 11:48:04 -0700
committerXin Li <delphij@google.com>2019-10-30 11:48:04 -0700
commit40a724f17528955c6be91eaf8b2576189492e833 (patch)
treed63135df0e611e34319dc4f50f04fa715a1a17bf
parent7b0a680a6fa4c320699b43484e54cf3a74bac199 (diff)
parent617eb5bacb920a1fa5ea2aea8cfff07f688d6dcb (diff)
downloadlibopus-40a724f17528955c6be91eaf8b2576189492e833.tar.gz
DO NOT MERGE - qt-qpr1-dev-plus-aosp-without-vendor@5915889 into stage-aosp-mastertemp_b_145570283
Bug: 142003500 Change-Id: I0a3115f735d47ac9693aa074fbc2a48faa86e179
-rw-r--r--silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c11
1 files changed, 8 insertions, 3 deletions
diff --git a/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c b/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c
index ee06f986..6f3be025 100644
--- a/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c
+++ b/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c
@@ -84,7 +84,9 @@ void silk_warped_autocorrelation_FIX_neon(
silk_assert( ( order & 1 ) == 0 );
silk_assert( 2 * QS - QC >= 0 );
- ALLOC( input_QST, length + 2 * MAX_SHAPE_LPC_ORDER, opus_int32 );
+ /* The additional +4 is to ensure a later vld1q_s32 call does not overflow. */
+ /* Strictly, only +3 is needed but +4 simplifies initialization using the 4x32 neon load. */
+ ALLOC( input_QST, length + 2 * MAX_SHAPE_LPC_ORDER + 4, opus_int32 );
input_QS = input_QST;
/* input_QS has zero paddings in the beginning and end. */
@@ -121,6 +123,8 @@ void silk_warped_autocorrelation_FIX_neon(
vst1q_s32( input_QS, vdupq_n_s32( 0 ) );
input_QS += 4;
vst1q_s32( input_QS, vdupq_n_s32( 0 ) );
+ input_QS += 4;
+ vst1q_s32( input_QS, vdupq_n_s32( 0 ) );
input_QS = input_QST + MAX_SHAPE_LPC_ORDER - orderT;
/* The following loop runs ( length + order ) times, with ( order ) extra epilogues. */
@@ -153,7 +157,8 @@ void silk_warped_autocorrelation_FIX_neon(
opus_int o = orderT;
int32x4_t state_QS_s32x4[ 3 ][ 2 ];
- ALLOC( state, length + orderT, opus_int32 );
+ /* The additional +4 is to ensure a later vld1q_s32 call does not overflow. */
+ ALLOC( state, length + order + 4, opus_int32 );
state_QS_s32x4[ 2 ][ 1 ] = vdupq_n_s32( 0 );
/* Calculate 8 taps of all inputs in each loop. */
@@ -172,7 +177,7 @@ void silk_warped_autocorrelation_FIX_neon(
state_QS_s32x4[ 0 ][ 1 ] = calc_state( state_QS_s32x4[ 0 ][ 1 ], state_QS_s32x4[ 2 ][ 1 ], state_QS_s32x4[ 1 ][ 1 ], warping_Q16_s32x4 );
state_QS_s32x4[ 1 ][ 0 ] = state_QS_s32x4[ 2 ][ 0 ];
state_QS_s32x4[ 1 ][ 1 ] = state_QS_s32x4[ 2 ][ 1 ];
- } while( ++n < ( length + order - 3) );
+ } while( ++n < ( length + order ) );
in = state;
o -= 8;
} while( o > 4 );