diff options
Diffstat (limited to 'decoder/armv7/ixheaacd_tns_ar_filter_fixed.s')
-rw-r--r-- | decoder/armv7/ixheaacd_tns_ar_filter_fixed.s | 660 |
1 files changed, 330 insertions, 330 deletions
diff --git a/decoder/armv7/ixheaacd_tns_ar_filter_fixed.s b/decoder/armv7/ixheaacd_tns_ar_filter_fixed.s index d0b2dd2..48fb61c 100644 --- a/decoder/armv7/ixheaacd_tns_ar_filter_fixed.s +++ b/decoder/armv7/ixheaacd_tns_ar_filter_fixed.s @@ -28,68 +28,68 @@ .global ixheaacd_tns_ar_filter_fixed_armv7 ixheaacd_tns_ar_filter_fixed_armv7: - STMFD r13! , {r4 - r12, r14} - vpush {d8-d15} - SUB sp, sp, #128 @ state[MaximumOrder] + one more - LDR r4, [sp,#232] @order - LDR r6, [sp,#236] @shift_value - STR r1, [sp] - ADD r12, sp, #4 @ r12 = state - ANDS r5, r4, #3 - BEQ FILTER_LOOP - MOV r8, #0 - ADD r14, r3, r4,LSL #2 - RSBS r7, r5, #3 - BEQ ORDER_LOOPEND + STMFD r13! , {r4 - r12, r14} + vpush {d8-d15} + SUB sp, sp, #128 @ state[MaximumOrder] + one more + LDR r4, [sp, #232] @order + LDR r6, [sp, #236] @shift_value + STR r1, [sp] + ADD r12, sp, #4 @ r12 = state + ANDS r5, r4, #3 + BEQ FILTER_LOOP + MOV r8, #0 + ADD r14, r3, r4, LSL #2 + RSBS r7, r5, #3 + BEQ ORDER_LOOPEND ORDER_LOOP: - STR r8, [r14,#4]! @lpc[i] = 0 - SUBS r7, r7, #1 - BGT ORDER_LOOP + STR r8, [r14, #4]! @lpc[i] = 0 + SUBS r7, r7, #1 + BGT ORDER_LOOP ORDER_LOOPEND: - STR r8, [r14,#4] @lpc[i] = 0 - BIC r4, r4, #3 - ADD r4, r4, #4 @order = ( (order & 0xfffffffc) +4 ) + STR r8, [r14, #4] @lpc[i] = 0 + BIC r4, r4, #3 + ADD r4, r4, #4 @order = ( (order & 0xfffffffc) +4 ) FILTER_LOOP: - LDR r1, [sp,#240] @scaleSpec + LDR r1, [sp, #240] @scaleSpec @filtering loop here - CMP r2, #1 @ inc =1 or -1 - MOV r7, r4 @loop_count - BNE NEG_INC - - LDR r8 , [r0] @r8 =*spectrum - SUBS r7 , r7 , #1 - MOV r8, r8, lsl r1 - MOV r9, r8, asr r1 - MOV r8 , r8 ,lsl r6 - STR r8 , [r12] @state[0] = sp[top] - STR r9, [r0], #4 - BEQ FILTER_LOOP2 -FILTER_LOOP1: @siva 16 times loop run - LDR r8 , [r0] @r8 =*spectrum - SUB r5 , r4 , r7 @ - MOV r5 , r5 ,lsl #2 - MOV r11 , #0 @accu = 0 - ADD r14, r12, r5 @state[j] + CMP r2, #1 @ inc =1 or -1 + MOV r7, r4 @loop_count + BNE NEG_INC + + LDR r8 , [r0] @r8 =*spectrum + SUBS r7 , r7 , #1 + MOV r8, r8, lsl r1 + MOV r9, r8, asr r1 + MOV r8 , r8 , lsl r6 + STR r8 , [r12] @state[0] = sp[top] + STR r9, [r0], #4 + BEQ FILTER_LOOP2 +FILTER_LOOP1: @siva 16 times loop run + LDR r8 , [r0] @r8 =*spectrum + SUB r5 , r4 , r7 @ + MOV r5 , r5 , lsl #2 + MOV r11 , #0 @accu = 0 + ADD r14, r12, r5 @state[j] INNER_LOOP1: - LDR r10 , [r14,#-4] @state[j-1] - LDR r9 , [r3 , r5] @lpc[j] - SUBS r5 , r5 , #4 + LDR r10 , [r14, #-4] @state[j-1] + LDR r9 , [r3 , r5] @lpc[j] + SUBS r5 , r5 , #4 - MOV r2, #0 - SMLAL r2 , r11, r10, r9 - STR r10 , [r14], #-4 @state[j] = state[j - 1] - BGT INNER_LOOP1 + MOV r2, #0 + SMLAL r2 , r11, r10, r9 + STR r10 , [r14], #-4 @state[j] = state[j - 1] + BGT INNER_LOOP1 - MOV r8, r8, lsl r1 - SUB r8 , r8 , r11,lsl #1 - MOV r9, r8, asr r1 - STR r9 , [r0], #4 @*spectrum = y@ - SUBS r7 , r7 , #1 @i-- - MOV r8 , r8 ,lsl r6 - STR r8 , [r12] @state[0] - BGT FILTER_LOOP1 + MOV r8, r8, lsl r1 + SUB r8 , r8 , r11, lsl #1 + MOV r9, r8, asr r1 + STR r9 , [r0], #4 @*spectrum = y@ + SUBS r7 , r7 , #1 @i-- + MOV r8 , r8 , lsl r6 + STR r8 , [r12] @state[0] + BGT FILTER_LOOP1 @@ -98,456 +98,456 @@ INNER_LOOP1: FILTER_LOOP2: - LDR R1, [sp] @size - ADD R8, R3, #4 + LDR R1, [sp] @size + ADD R8, R3, #4 - SUBS R7 , R1 , r4 @size-order - BEQ EXIT + SUBS R7 , R1 , r4 @size-order + BEQ EXIT - LDR R1, [sp,#240] @scaleSpec + LDR R1, [sp, #240] @scaleSpec - MOV R5 , R4 ,LSL #2 @count for inner loop = order - VLD1.32 {D10, D11}, [R8]! @lpc[j] - MOV R14,#0 - VLD1.32 {D12, D13}, [R12]! @state[j - 1] + MOV R5 , R4 , LSL #2 @count for inner loop = order + VLD1.32 {D10, D11}, [R8]! @lpc[j] + MOV R14, #0 + VLD1.32 {D12, D13}, [R12]! @state[j - 1] - CMP R4,#4 - VLD1.32 {D18, D19}, [R8]! - BEQ ORDER4 - VLD1.32 {D22, D23}, [R12]! + CMP R4, #4 + VLD1.32 {D18, D19}, [R8]! + BEQ ORDER4 + VLD1.32 {D22, D23}, [R12]! - CMP R4,#8 - VLD1.32 {D20, D21}, [R8]! - BEQ ORDER8 - CMP R4,#12 - VLD1.32 {D24, D25}, [R12]! - BEQ ORDER12 + CMP R4, #8 + VLD1.32 {D20, D21}, [R8]! + BEQ ORDER8 + CMP R4, #12 + VLD1.32 {D24, D25}, [R12]! + BEQ ORDER12 - VLD1.32 {D26, D27},[R8]! - CMP R4,#16 - VLD1.32 {D28, D29},[R12]! - BEQ ORDER16 @order16 added + VLD1.32 {D26, D27}, [R8]! + CMP R4, #16 + VLD1.32 {D28, D29}, [R12]! + BEQ ORDER16 @order16 added - VLD1.32 {D4, D5},[R8]! - CMP R4,#20 - VLD1.32 {D8, D9},[R12]! - BEQ ORDER20 @order20 added + VLD1.32 {D4, D5}, [R8]! + CMP R4, #20 + VLD1.32 {D8, D9}, [R12]! + BEQ ORDER20 @order20 added ORDER4: - LDR r8 , [r0] @r8 = y = *spectrum + LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_4: - VDUP.32 Q1,R14 @Q1= accu = 0 + VDUP.32 Q1, R14 @Q1= accu = 0 - VMLAL.S32 Q1, D10, D12 + VMLAL.S32 Q1, D10, D12 - VMLAL.S32 Q1, D11, D13 + VMLAL.S32 Q1, D11, D13 - MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec - SUBS r7 , r7 , #1 + MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec + SUBS r7 , r7 , #1 - VADD.I64 D6,D2,D3 + VADD.I64 D6, D2, D3 - VSHR.S64 D6,#32 @acc1=acc>>32 @acc = mac32_tns_neon(state[j - 1],lpc[j],acc, temp_lo)@ - @VMOV R11,D6[0] - VST1.32 D6[0],[SP] - LDR R11,[SP] + VSHR.S64 D6, #32 @acc1=acc>>32 @acc = mac32_tns_neon(state[j - 1],lpc[j],acc, temp_lo)@ + @VMOV R11,D6[0] + VST1.32 D6[0], [SP] + LDR R11, [SP] - SUB r8 , r8 , r11,lsl #1 @y=sub32(y,(acc<<1)) - MOV r2 , r8 ,lsl r6 @ shl32(y, shift_value) + SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) + MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) - MOV r9, r8, asr r1 - VMOV.32 D15[1], R2 @state[0] + MOV r9, r8, asr r1 + VMOV.32 D15[1], R2 @state[0] - STR r9 , [r0],#4 @*spectrum = y@ - VEXT.32 Q6,Q7,Q6,#3 - LDRGT r8 , [r0] @r8 = y = *spectrum + STR r9 , [r0], #4 @*spectrum = y@ + VEXT.32 Q6, Q7, Q6, #3 + LDRGT r8 , [r0] @r8 = y = *spectrum - BGT OUTER_LOOP2_4 + BGT OUTER_LOOP2_4 - B EXIT + B EXIT ORDER8: - LDR r8 , [r0] @r8 = y = *spectrum + LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_8: - VDUP.32 Q1,R14 @Q1= accu = 0 + VDUP.32 Q1, R14 @Q1= accu = 0 - VMLAL.S32 Q1, D10, D12 - VMLAL.S32 Q1, D11, D13 - VMLAL.S32 Q1, D22, D18 - VMLAL.S32 Q1, D23, D19 + VMLAL.S32 Q1, D10, D12 + VMLAL.S32 Q1, D11, D13 + VMLAL.S32 Q1, D22, D18 + VMLAL.S32 Q1, D23, D19 - MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec - VEXT.32 Q11,Q6,Q11,#3 - SUBS r7 , r7 , #1 + MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec + VEXT.32 Q11, Q6, Q11, #3 + SUBS r7 , r7 , #1 - VADD.I64 D6,D2,D3 - VSHR.S64 D6,#32 @acc = mac32_tns_neon(state[j - 1],lpc[j],acc, temp_lo)@ + VADD.I64 D6, D2, D3 + VSHR.S64 D6, #32 @acc = mac32_tns_neon(state[j - 1],lpc[j],acc, temp_lo)@ - @VMOV R11,D6[0] - VST1.32 D6[0],[SP] - LDR R11,[SP] - SUB r8 , r8 , r11,lsl #1 @y=sub32(y,(acc<<1)) + @VMOV R11,D6[0] + VST1.32 D6[0], [SP] + LDR R11, [SP] + SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) - MOV r2 , r8 ,lsl r6 @ shl32(y, shift_value) + MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) - MOV r9, r8, asr r1 - VMOV.32 D15[1], R2 @state[0] + MOV r9, r8, asr r1 + VMOV.32 D15[1], R2 @state[0] - STR r9 , [r0],#4 @*spectrum = y@ - VEXT.32 Q6,Q7,Q6,#3 - LDRGT r8 , [r0] @r8 = y = *spectrum + STR r9 , [r0], #4 @*spectrum = y@ + VEXT.32 Q6, Q7, Q6, #3 + LDRGT r8 , [r0] @r8 = y = *spectrum - BGT OUTER_LOOP2_8 + BGT OUTER_LOOP2_8 - B EXIT + B EXIT ORDER12: - LDR r8 , [r0] @r8 = y = *spectrum + LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_12: - VDUP.32 Q1,R14 @Q1= accu = 0 + VDUP.32 Q1, R14 @Q1= accu = 0 - VMLAL.S32 Q1, D10, D12 - VMLAL.S32 Q1, D11, D13 - VMLAL.S32 Q1, D22, D18 - VMLAL.S32 Q1, D23, D19 - VMLAL.S32 Q1, D24, D20 - VMLAL.S32 Q1, D25, D21 + VMLAL.S32 Q1, D10, D12 + VMLAL.S32 Q1, D11, D13 + VMLAL.S32 Q1, D22, D18 + VMLAL.S32 Q1, D23, D19 + VMLAL.S32 Q1, D24, D20 + VMLAL.S32 Q1, D25, D21 - MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec - VEXT.32 Q12,Q11,Q12,#3 - SUBS r7 , r7 , #1 + MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec + VEXT.32 Q12, Q11, Q12, #3 + SUBS r7 , r7 , #1 - VADD.I64 D6,D2,D3 - VEXT.32 Q11,Q6,Q11,#3 - VSHR.S64 D6,#32 + VADD.I64 D6, D2, D3 + VEXT.32 Q11, Q6, Q11, #3 + VSHR.S64 D6, #32 - @VMOV R11,D6[0] - VST1.32 D6[0],[SP] - LDR R11,[SP] - SUB r8 , r8 , r11,lsl #1 @y=sub32(y,(acc<<1)) + @VMOV R11,D6[0] + VST1.32 D6[0], [SP] + LDR R11, [SP] + SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) - MOV r2 , r8 ,lsl r6 @ shl32(y, shift_value) + MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) - MOV r9, r8, asr r1 - VMOV.32 D15[1], R2 @state[0] + MOV r9, r8, asr r1 + VMOV.32 D15[1], R2 @state[0] - STR r9 , [r0],#4 @*spectrum = y@ - VEXT.32 Q6,Q7,Q6,#3 - LDRGT r8 , [r0] @r8 = y = *spectrum + STR r9 , [r0], #4 @*spectrum = y@ + VEXT.32 Q6, Q7, Q6, #3 + LDRGT r8 , [r0] @r8 = y = *spectrum - BGT OUTER_LOOP2_12 + BGT OUTER_LOOP2_12 - B EXIT + B EXIT ORDER16: - LDR r8 , [r0] @r8 = y = *spectrum + LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_16: - VDUP.32 Q1,R14 @Q1= accu = 0 + VDUP.32 Q1, R14 @Q1= accu = 0 - VMLAL.S32 Q1, D10, D12 - VMLAL.S32 Q1, D11, D13 - VMLAL.S32 Q1, D22, D18 - VMLAL.S32 Q1, D23, D19 - VMLAL.S32 Q1, D24, D20 - VMLAL.S32 Q1, D25, D21 - VMLAL.S32 Q1, D28, D26 @ - VMLAL.S32 Q1, D29, D27 @ @order16 + VMLAL.S32 Q1, D10, D12 + VMLAL.S32 Q1, D11, D13 + VMLAL.S32 Q1, D22, D18 + VMLAL.S32 Q1, D23, D19 + VMLAL.S32 Q1, D24, D20 + VMLAL.S32 Q1, D25, D21 + VMLAL.S32 Q1, D28, D26 @ + VMLAL.S32 Q1, D29, D27 @ @order16 - VEXT.32 Q14,Q12,Q14,#3 - MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec - VEXT.32 Q12,Q11,Q12,#3 - SUBS r7 , r7 , #1 + VEXT.32 Q14, Q12, Q14, #3 + MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec + VEXT.32 Q12, Q11, Q12, #3 + SUBS r7 , r7 , #1 - VADD.I64 D6,D2,D3 - VEXT.32 Q11,Q6,Q11,#3 - VSHR.S64 D6,#32 + VADD.I64 D6, D2, D3 + VEXT.32 Q11, Q6, Q11, #3 + VSHR.S64 D6, #32 - @VMOV R11,D6[0] - VST1.32 D6[0],[SP] - LDR R11,[SP] - SUB r8 , r8 , r11,lsl #1 @y=sub32(y,(acc<<1)) + @VMOV R11,D6[0] + VST1.32 D6[0], [SP] + LDR R11, [SP] + SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) - MOV r2 , r8 ,lsl r6 @ shl32(y, shift_value) + MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) - MOV r9, r8, asr r1 - VMOV.32 D15[1], R2 @state[0] + MOV r9, r8, asr r1 + VMOV.32 D15[1], R2 @state[0] - STR r9 , [r0],#4 @*spectrum = y@ - VEXT.32 Q6,Q7,Q6,#3 - LDRGT r8 , [r0] @r8 = y = *spectrum + STR r9 , [r0], #4 @*spectrum = y@ + VEXT.32 Q6, Q7, Q6, #3 + LDRGT r8 , [r0] @r8 = y = *spectrum - BGT OUTER_LOOP2_16 + BGT OUTER_LOOP2_16 - B EXIT + B EXIT ORDER20: - LDR r8 , [r0] @r8 = y = *spectrum + LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_20: - VDUP.32 Q1,R14 @Q1= accu = 0 + VDUP.32 Q1, R14 @Q1= accu = 0 - VMLAL.S32 Q1, D10, D12 - VMLAL.S32 Q1, D11, D13 - VMLAL.S32 Q1, D22, D18 - VMLAL.S32 Q1, D23, D19 - VMLAL.S32 Q1, D24, D20 - VMLAL.S32 Q1, D25, D21 - VMLAL.S32 Q1, D28, D26 @ - VMLAL.S32 Q1, D29, D27 @ @order16 - VMLAL.S32 Q1, D8, D4 @order20 - VMLAL.S32 Q1, D9, D5 @order20 + VMLAL.S32 Q1, D10, D12 + VMLAL.S32 Q1, D11, D13 + VMLAL.S32 Q1, D22, D18 + VMLAL.S32 Q1, D23, D19 + VMLAL.S32 Q1, D24, D20 + VMLAL.S32 Q1, D25, D21 + VMLAL.S32 Q1, D28, D26 @ + VMLAL.S32 Q1, D29, D27 @ @order16 + VMLAL.S32 Q1, D8, D4 @order20 + VMLAL.S32 Q1, D9, D5 @order20 - VEXT.32 Q4,Q14,Q4,#3 @ @for order20 - VEXT.32 Q14,Q12,Q14,#3 @ @for order16 - MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec - VEXT.32 Q12,Q11,Q12,#3 @order12 - SUBS r7 , r7 , #1 + VEXT.32 Q4, Q14, Q4, #3 @ @for order20 + VEXT.32 Q14, Q12, Q14, #3 @ @for order16 + MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec + VEXT.32 Q12, Q11, Q12, #3 @order12 + SUBS r7 , r7 , #1 - VADD.I64 D6,D2,D3 - VEXT.32 Q11,Q6,Q11,#3 @order8 - VSHR.S64 D6,#32 + VADD.I64 D6, D2, D3 + VEXT.32 Q11, Q6, Q11, #3 @order8 + VSHR.S64 D6, #32 - @VMOV R11,D6[0] - VST1.32 D6[0],[SP] - LDR R11,[SP] - SUB r8 , r8 , r11,lsl #1 @y=sub32(y,(acc<<1)) + @VMOV R11,D6[0] + VST1.32 D6[0], [SP] + LDR R11, [SP] + SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) - MOV r2 , r8 ,lsl r6 @ shl32(y, shift_value) + MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) - MOV r9, r8, asr r1 - VMOV.32 D15[1], R2 @state[0] + MOV r9, r8, asr r1 + VMOV.32 D15[1], R2 @state[0] - STR r9 , [r0],#4 @*spectrum = y@ - VEXT.32 Q6,Q7,Q6,#3 - LDRGT r8 , [r0] @r8 = y = *spectrum + STR r9 , [r0], #4 @*spectrum = y@ + VEXT.32 Q6, Q7, Q6, #3 + LDRGT r8 , [r0] @r8 = y = *spectrum - BGT OUTER_LOOP2_20 + BGT OUTER_LOOP2_20 - B EXIT + B EXIT NEG_INC: @ filtering loop for inc = -1 - LDR r8 , [r0] @r8 =*spectrum - SUBS r7 , r7 , #1 - MOV r8, r8, lsl r1 - MOV r9, r8, asr r1 - MOV r8 , r8 ,lsl r6 - STR r8 , [r12] @state[0] = sp[top] - STR r9, [r0], #-4 - BEQ NEGFILTER_LOOP2 + LDR r8 , [r0] @r8 =*spectrum + SUBS r7 , r7 , #1 + MOV r8, r8, lsl r1 + MOV r9, r8, asr r1 + MOV r8 , r8 , lsl r6 + STR r8 , [r12] @state[0] = sp[top] + STR r9, [r0], #-4 + BEQ NEGFILTER_LOOP2 NEGFILTER_LOOP1: - LDR r8 , [r0] @r8 =*spectrum - SUB r5 , r4 , r7 @ - MOV r5 , r5 ,lsl #2 - MOV r11 , #0 @accu = 0 - ADD r14, r12, r5 @state[j] + LDR r8 , [r0] @r8 =*spectrum + SUB r5 , r4 , r7 @ + MOV r5 , r5 , lsl #2 + MOV r11 , #0 @accu = 0 + ADD r14, r12, r5 @state[j] NEGINNER_LOOP1: - LDR r10 , [r14,#-4] @state[j-1] - LDR r9 , [r3 , r5] @lpc[j] - SUBS r5 , r5 , #4 - - MOV r2, #0 - SMLAL r2 , r11, r10, r9 - STR r10 , [r14], #-4 @state[j] = state[j - 1] - BGT NEGINNER_LOOP1 - - MOV r8, r8, lsl r1 - SUB r8 , r8 , r11,lsl #1 - MOV r9, r8, asr r1 - STR r9 , [r0], #-4 @*spectrum = y@ - SUBS r7 , r7 , #1 @i-- - MOV r8 , r8 ,lsl r6 - STR r8 , [r12] @state[0] - BGT NEGFILTER_LOOP1 + LDR r10 , [r14, #-4] @state[j-1] + LDR r9 , [r3 , r5] @lpc[j] + SUBS r5 , r5 , #4 + + MOV r2, #0 + SMLAL r2 , r11, r10, r9 + STR r10 , [r14], #-4 @state[j] = state[j - 1] + BGT NEGINNER_LOOP1 + + MOV r8, r8, lsl r1 + SUB r8 , r8 , r11, lsl #1 + MOV r9, r8, asr r1 + STR r9 , [r0], #-4 @*spectrum = y@ + SUBS r7 , r7 , #1 @i-- + MOV r8 , r8 , lsl r6 + STR r8 , [r12] @state[0] + BGT NEGFILTER_LOOP1 NEGFILTER_LOOP2: - LDR R1, [sp] @size - SUBS R7 , R1 , r4 @size-order - BEQ EXIT + LDR R1, [sp] @size + SUBS R7 , R1 , r4 @size-order + BEQ EXIT - ADD R8, R3, #4 + ADD R8, R3, #4 - MOV R14,#0 - VLD1.32 {D10, D11}, [R8]! @lpc[j] - MOV R5 , R4 ,LSL #2 @count for inner loop = order + MOV R14, #0 + VLD1.32 {D10, D11}, [R8]! @lpc[j] + MOV R5 , R4 , LSL #2 @count for inner loop = order - LDR R1, [sp,#240] @scaleSpec + LDR R1, [sp, #240] @scaleSpec - CMP R4,#4 - VLD1.32 {D12, D13}, [R12]! @state[j - 1] - BEQ NEGORDER4 + CMP R4, #4 + VLD1.32 {D12, D13}, [R12]! @state[j - 1] + BEQ NEGORDER4 - VLD1.32 {D18, D19}, [R8]! - CMP R4,#8 + VLD1.32 {D18, D19}, [R8]! + CMP R4, #8 - VLD1.32 {D22, D23}, [R12]! - BEQ NEGORDER8 + VLD1.32 {D22, D23}, [R12]! + BEQ NEGORDER8 - VLD1.32 {D20, D21}, [R8]! - CMP R4,#12 + VLD1.32 {D20, D21}, [R8]! + CMP R4, #12 - VLD1.32 {D24, D25}, [R12]! - BEQ NEGORDER12 + VLD1.32 {D24, D25}, [R12]! + BEQ NEGORDER12 NEGORDER4: - LDR r8 , [r0] @r8 = y = *spectrum + LDR r8 , [r0] @r8 = y = *spectrum NEGOUTER_LOOP2_4: - VDUP.32 Q1,R14 @Q1= accu = 0 + VDUP.32 Q1, R14 @Q1= accu = 0 - VMLAL.S32 Q1, D10, D12 - VMLAL.S32 Q1, D11, D13 - MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec - SUBS r7 , r7 , #1 + VMLAL.S32 Q1, D10, D12 + VMLAL.S32 Q1, D11, D13 + MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec + SUBS r7 , r7 , #1 - VADD.I64 D6,D2,D3 - VSHR.S64 D6,#32 + VADD.I64 D6, D2, D3 + VSHR.S64 D6, #32 - @VMOV R11,D6[0] - VST1.32 D6[0],[SP] - LDR R11,[SP] - SUB r8 , r8 , r11,lsl #1 @y=sub32(y,(acc<<1)) + @VMOV R11,D6[0] + VST1.32 D6[0], [SP] + LDR R11, [SP] + SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) - MOV r2 , r8 ,lsl r6 + MOV r2 , r8 , lsl r6 - VMOV.32 D15[1], R2 - MOV r9, r8, asr r1 + VMOV.32 D15[1], R2 + MOV r9, r8, asr r1 - STR r9 , [r0],#-4 @*spectrum = y@ - VEXT.32 Q6,Q7,Q6,#3 - LDRGT r8 , [r0] @r8 = y = *spectrum + STR r9 , [r0], #-4 @*spectrum = y@ + VEXT.32 Q6, Q7, Q6, #3 + LDRGT r8 , [r0] @r8 = y = *spectrum - BGT NEGOUTER_LOOP2_4 + BGT NEGOUTER_LOOP2_4 - B EXIT + B EXIT NEGORDER8: - LDR r8 , [r0] @r8 = y = *spectrum + LDR r8 , [r0] @r8 = y = *spectrum NEGOUTER_LOOP2_8: - VDUP.32 Q1,R14 @Q1= accu = 0 + VDUP.32 Q1, R14 @Q1= accu = 0 - VMLAL.S32 Q1, D10, D12 - VMLAL.S32 Q1, D11, D13 - VMLAL.S32 Q1, D22, D18 - VMLAL.S32 Q1, D23, D19 + VMLAL.S32 Q1, D10, D12 + VMLAL.S32 Q1, D11, D13 + VMLAL.S32 Q1, D22, D18 + VMLAL.S32 Q1, D23, D19 - MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec - VEXT.32 Q11,Q6,Q11,#3 - SUBS r7 , r7 , #1 + MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec + VEXT.32 Q11, Q6, Q11, #3 + SUBS r7 , r7 , #1 - VADD.I64 D6,D2,D3 + VADD.I64 D6, D2, D3 - VSHR.S64 D6,#32 + VSHR.S64 D6, #32 - @VMOV R11,D6[0] - VST1.32 D6[0],[SP] - LDR R11,[SP] - SUB r8 , r8 , r11,lsl #1 @y=sub32(y,(acc<<1)) - MOV r2 , r8 ,lsl r6 @ shl32(y, shift_value) + @VMOV R11,D6[0] + VST1.32 D6[0], [SP] + LDR R11, [SP] + SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) + MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) - VMOV.32 D15[1], R2 @state[0] - MOV r9, r8, asr r1 + VMOV.32 D15[1], R2 @state[0] + MOV r9, r8, asr r1 - STR r9 , [r0],#-4 @*spectrum = y@ - VEXT.32 Q6,Q7,Q6,#3 - LDRGT r8 , [r0] @r8 = y = *spectrum + STR r9 , [r0], #-4 @*spectrum = y@ + VEXT.32 Q6, Q7, Q6, #3 + LDRGT r8 , [r0] @r8 = y = *spectrum - BGT NEGOUTER_LOOP2_8 + BGT NEGOUTER_LOOP2_8 - B EXIT + B EXIT NEGORDER12: - LDR r8 , [r0] @r8 = y = *spectrum + LDR r8 , [r0] @r8 = y = *spectrum NEGOUTER_LOOP2_12: - VDUP.32 Q1,R14 @Q1= accu = 0 + VDUP.32 Q1, R14 @Q1= accu = 0 - VMLAL.S32 Q1, D10, D12 - VMLAL.S32 Q1, D11, D13 - VMLAL.S32 Q1, D22, D18 - VMLAL.S32 Q1, D23, D19 - VMLAL.S32 Q1, D24, D20 - VMLAL.S32 Q1, D25, D21 + VMLAL.S32 Q1, D10, D12 + VMLAL.S32 Q1, D11, D13 + VMLAL.S32 Q1, D22, D18 + VMLAL.S32 Q1, D23, D19 + VMLAL.S32 Q1, D24, D20 + VMLAL.S32 Q1, D25, D21 - MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec - VEXT.32 Q12,Q11,Q12,#3 - SUBS r7 , r7 , #1 + MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec + VEXT.32 Q12, Q11, Q12, #3 + SUBS r7 , r7 , #1 - VADD.I64 D6,D2,D3 - VEXT.32 Q11,Q6,Q11,#3 - VSHR.S64 D6,#32 + VADD.I64 D6, D2, D3 + VEXT.32 Q11, Q6, Q11, #3 + VSHR.S64 D6, #32 - @VMOV R11,D6[0] - VST1.32 D6[0],[SP] - LDR R11,[SP] - SUB r8 , r8 , r11,lsl #1 @y=sub32(y,(acc<<1)) + @VMOV R11,D6[0] + VST1.32 D6[0], [SP] + LDR R11, [SP] + SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) - MOV r2 , r8 ,lsl r6 @ shl32(y, shift_value) + MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) - VMOV.32 D15[1], R2 @state[0] - MOV r9, r8, asr r1 + VMOV.32 D15[1], R2 @state[0] + MOV r9, r8, asr r1 - STR r9 , [r0],#-4 @*spectrum = y@ - VEXT.32 Q6,Q7,Q6,#3 - LDRGT r8 , [r0] @r8 = y = *spectrum + STR r9 , [r0], #-4 @*spectrum = y@ + VEXT.32 Q6, Q7, Q6, #3 + LDRGT r8 , [r0] @r8 = y = *spectrum - BGT NEGOUTER_LOOP2_12 + BGT NEGOUTER_LOOP2_12 EXIT: - ADD sp, sp ,#128 - vpop {d8-d15} - LDMFD r13!, {r4 - r12, r15} + ADD sp, sp , #128 + vpop {d8-d15} + LDMFD r13!, {r4 - r12, r15} |