@/****************************************************************************** @ * @ * Copyright (C) 2018 The Android Open Source Project @ * @ * Licensed under the Apache License, Version 2.0 (the "License")@ @ * you may not use this file except in compliance with the License. @ * You may obtain a copy of the License at: @ * @ * http://www.apache.org/licenses/LICENSE-2.0 @ * @ * Unless required by applicable law or agreed to in writing, software @ * distributed under the License is distributed on an "AS IS" BASIS, @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @ * See the License for the specific language governing permissions and @ * limitations under the License. @ * @ ***************************************************************************** @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore @*/ .text .p2align 2 .text .p2align 2 .global ixheaacd_tns_ar_filter_fixed_armv7 ixheaacd_tns_ar_filter_fixed_armv7: STMFD r13! , {r4 - r12, r14} vpush {d8-d15} SUB sp, sp, #128 @ state[MaximumOrder] + one more LDR r4, [sp, #232] @order LDR r6, [sp, #236] @shift_value STR r1, [sp] ADD r12, sp, #4 @ r12 = state ANDS r5, r4, #3 BEQ FILTER_LOOP MOV r8, #0 ADD r14, r3, r4, LSL #2 RSBS r7, r5, #3 BEQ ORDER_LOOPEND ORDER_LOOP: STR r8, [r14, #4]! @lpc[i] = 0 SUBS r7, r7, #1 BGT ORDER_LOOP ORDER_LOOPEND: STR r8, [r14, #4] @lpc[i] = 0 BIC r4, r4, #3 ADD r4, r4, #4 @order = ( (order & 0xfffffffc) +4 ) FILTER_LOOP: LDR r1, [sp, #240] @scaleSpec @filtering loop here CMP r2, #1 @ inc =1 or -1 MOV r7, r4 @loop_count BNE NEG_INC LDR r8 , [r0] @r8 =*spectrum SUBS r7 , r7 , #1 MOV r8, r8, lsl r1 MOV r9, r8, asr r1 MOV r8 , r8 , lsl r6 STR r8 , [r12] @state[0] = sp[top] STR r9, [r0], #4 BEQ FILTER_LOOP2 FILTER_LOOP1: @siva 16 times loop run LDR r8 , [r0] @r8 =*spectrum SUB r5 , r4 , r7 @ MOV r5 , r5 , lsl #2 MOV r11 , #0 @accu = 0 ADD r14, r12, r5 @state[j] INNER_LOOP1: LDR r10 , [r14, #-4] @state[j-1] LDR r9 , [r3 , r5] @lpc[j] SUBS r5 , r5 , #4 MOV r2, #0 SMLAL r2 , r11, r10, r9 STR r10 , [r14], #-4 @state[j] = state[j - 1] BGT INNER_LOOP1 MOV r8, r8, lsl r1 SUB r8 , r8 , r11, lsl #1 MOV r9, r8, asr r1 STR r9 , [r0], #4 @*spectrum = y@ SUBS r7 , r7 , #1 @i-- MOV r8 , r8 , lsl r6 STR r8 , [r12] @state[0] BGT FILTER_LOOP1 @inc=1,order=4 FILTER_LOOP2: LDR R1, [sp] @size ADD R8, R3, #4 SUBS R7 , R1 , r4 @size-order BEQ EXIT LDR R1, [sp, #240] @scaleSpec MOV R5 , R4 , LSL #2 @count for inner loop = order VLD1.32 {D10, D11}, [R8]! @lpc[j] MOV R14, #0 VLD1.32 {D12, D13}, [R12]! @state[j - 1] CMP R4, #4 VLD1.32 {D18, D19}, [R8]! BEQ ORDER4 VLD1.32 {D22, D23}, [R12]! CMP R4, #8 VLD1.32 {D20, D21}, [R8]! BEQ ORDER8 CMP R4, #12 VLD1.32 {D24, D25}, [R12]! BEQ ORDER12 VLD1.32 {D26, D27}, [R8]! CMP R4, #16 VLD1.32 {D28, D29}, [R12]! BEQ ORDER16 @order16 added VLD1.32 {D4, D5}, [R8]! CMP R4, #20 VLD1.32 {D8, D9}, [R12]! BEQ ORDER20 @order20 added ORDER4: LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_4: VDUP.32 Q1, R14 @Q1= accu = 0 VMLAL.S32 Q1, D10, D12 VMLAL.S32 Q1, D11, D13 MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec SUBS r7 , r7 , #1 VADD.I64 D6, D2, D3 VSHR.S64 D6, #32 @acc1=acc>>32 @acc = mac32_tns_neon(state[j - 1],lpc[j],acc, temp_lo)@ @VMOV R11,D6[0] VST1.32 D6[0], [SP] LDR R11, [SP] SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) MOV r9, r8, asr r1 VMOV.32 D15[1], R2 @state[0] STR r9 , [r0], #4 @*spectrum = y@ VEXT.32 Q6, Q7, Q6, #3 LDRGT r8 , [r0] @r8 = y = *spectrum BGT OUTER_LOOP2_4 B EXIT ORDER8: LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_8: VDUP.32 Q1, R14 @Q1= accu = 0 VMLAL.S32 Q1, D10, D12 VMLAL.S32 Q1, D11, D13 VMLAL.S32 Q1, D22, D18 VMLAL.S32 Q1, D23, D19 MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec VEXT.32 Q11, Q6, Q11, #3 SUBS r7 , r7 , #1 VADD.I64 D6, D2, D3 VSHR.S64 D6, #32 @acc = mac32_tns_neon(state[j - 1],lpc[j],acc, temp_lo)@ @VMOV R11,D6[0] VST1.32 D6[0], [SP] LDR R11, [SP] SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) MOV r9, r8, asr r1 VMOV.32 D15[1], R2 @state[0] STR r9 , [r0], #4 @*spectrum = y@ VEXT.32 Q6, Q7, Q6, #3 LDRGT r8 , [r0] @r8 = y = *spectrum BGT OUTER_LOOP2_8 B EXIT ORDER12: LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_12: VDUP.32 Q1, R14 @Q1= accu = 0 VMLAL.S32 Q1, D10, D12 VMLAL.S32 Q1, D11, D13 VMLAL.S32 Q1, D22, D18 VMLAL.S32 Q1, D23, D19 VMLAL.S32 Q1, D24, D20 VMLAL.S32 Q1, D25, D21 MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec VEXT.32 Q12, Q11, Q12, #3 SUBS r7 , r7 , #1 VADD.I64 D6, D2, D3 VEXT.32 Q11, Q6, Q11, #3 VSHR.S64 D6, #32 @VMOV R11,D6[0] VST1.32 D6[0], [SP] LDR R11, [SP] SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) MOV r9, r8, asr r1 VMOV.32 D15[1], R2 @state[0] STR r9 , [r0], #4 @*spectrum = y@ VEXT.32 Q6, Q7, Q6, #3 LDRGT r8 , [r0] @r8 = y = *spectrum BGT OUTER_LOOP2_12 B EXIT ORDER16: LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_16: VDUP.32 Q1, R14 @Q1= accu = 0 VMLAL.S32 Q1, D10, D12 VMLAL.S32 Q1, D11, D13 VMLAL.S32 Q1, D22, D18 VMLAL.S32 Q1, D23, D19 VMLAL.S32 Q1, D24, D20 VMLAL.S32 Q1, D25, D21 VMLAL.S32 Q1, D28, D26 @ VMLAL.S32 Q1, D29, D27 @ @order16 VEXT.32 Q14, Q12, Q14, #3 MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec VEXT.32 Q12, Q11, Q12, #3 SUBS r7 , r7 , #1 VADD.I64 D6, D2, D3 VEXT.32 Q11, Q6, Q11, #3 VSHR.S64 D6, #32 @VMOV R11,D6[0] VST1.32 D6[0], [SP] LDR R11, [SP] SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) MOV r9, r8, asr r1 VMOV.32 D15[1], R2 @state[0] STR r9 , [r0], #4 @*spectrum = y@ VEXT.32 Q6, Q7, Q6, #3 LDRGT r8 , [r0] @r8 = y = *spectrum BGT OUTER_LOOP2_16 B EXIT ORDER20: LDR r8 , [r0] @r8 = y = *spectrum OUTER_LOOP2_20: VDUP.32 Q1, R14 @Q1= accu = 0 VMLAL.S32 Q1, D10, D12 VMLAL.S32 Q1, D11, D13 VMLAL.S32 Q1, D22, D18 VMLAL.S32 Q1, D23, D19 VMLAL.S32 Q1, D24, D20 VMLAL.S32 Q1, D25, D21 VMLAL.S32 Q1, D28, D26 @ VMLAL.S32 Q1, D29, D27 @ @order16 VMLAL.S32 Q1, D8, D4 @order20 VMLAL.S32 Q1, D9, D5 @order20 VEXT.32 Q4, Q14, Q4, #3 @ @for order20 VEXT.32 Q14, Q12, Q14, #3 @ @for order16 MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec VEXT.32 Q12, Q11, Q12, #3 @order12 SUBS r7 , r7 , #1 VADD.I64 D6, D2, D3 VEXT.32 Q11, Q6, Q11, #3 @order8 VSHR.S64 D6, #32 @VMOV R11,D6[0] VST1.32 D6[0], [SP] LDR R11, [SP] SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) MOV r9, r8, asr r1 VMOV.32 D15[1], R2 @state[0] STR r9 , [r0], #4 @*spectrum = y@ VEXT.32 Q6, Q7, Q6, #3 LDRGT r8 , [r0] @r8 = y = *spectrum BGT OUTER_LOOP2_20 B EXIT NEG_INC: @ filtering loop for inc = -1 LDR r8 , [r0] @r8 =*spectrum SUBS r7 , r7 , #1 MOV r8, r8, lsl r1 MOV r9, r8, asr r1 MOV r8 , r8 , lsl r6 STR r8 , [r12] @state[0] = sp[top] STR r9, [r0], #-4 BEQ NEGFILTER_LOOP2 NEGFILTER_LOOP1: LDR r8 , [r0] @r8 =*spectrum SUB r5 , r4 , r7 @ MOV r5 , r5 , lsl #2 MOV r11 , #0 @accu = 0 ADD r14, r12, r5 @state[j] NEGINNER_LOOP1: LDR r10 , [r14, #-4] @state[j-1] LDR r9 , [r3 , r5] @lpc[j] SUBS r5 , r5 , #4 MOV r2, #0 SMLAL r2 , r11, r10, r9 STR r10 , [r14], #-4 @state[j] = state[j - 1] BGT NEGINNER_LOOP1 MOV r8, r8, lsl r1 SUB r8 , r8 , r11, lsl #1 MOV r9, r8, asr r1 STR r9 , [r0], #-4 @*spectrum = y@ SUBS r7 , r7 , #1 @i-- MOV r8 , r8 , lsl r6 STR r8 , [r12] @state[0] BGT NEGFILTER_LOOP1 NEGFILTER_LOOP2: LDR R1, [sp] @size SUBS R7 , R1 , r4 @size-order BEQ EXIT ADD R8, R3, #4 MOV R14, #0 VLD1.32 {D10, D11}, [R8]! @lpc[j] MOV R5 , R4 , LSL #2 @count for inner loop = order LDR R1, [sp, #240] @scaleSpec CMP R4, #4 VLD1.32 {D12, D13}, [R12]! @state[j - 1] BEQ NEGORDER4 VLD1.32 {D18, D19}, [R8]! CMP R4, #8 VLD1.32 {D22, D23}, [R12]! BEQ NEGORDER8 VLD1.32 {D20, D21}, [R8]! CMP R4, #12 VLD1.32 {D24, D25}, [R12]! BEQ NEGORDER12 NEGORDER4: LDR r8 , [r0] @r8 = y = *spectrum NEGOUTER_LOOP2_4: VDUP.32 Q1, R14 @Q1= accu = 0 VMLAL.S32 Q1, D10, D12 VMLAL.S32 Q1, D11, D13 MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec SUBS r7 , r7 , #1 VADD.I64 D6, D2, D3 VSHR.S64 D6, #32 @VMOV R11,D6[0] VST1.32 D6[0], [SP] LDR R11, [SP] SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) MOV r2 , r8 , lsl r6 VMOV.32 D15[1], R2 MOV r9, r8, asr r1 STR r9 , [r0], #-4 @*spectrum = y@ VEXT.32 Q6, Q7, Q6, #3 LDRGT r8 , [r0] @r8 = y = *spectrum BGT NEGOUTER_LOOP2_4 B EXIT NEGORDER8: LDR r8 , [r0] @r8 = y = *spectrum NEGOUTER_LOOP2_8: VDUP.32 Q1, R14 @Q1= accu = 0 VMLAL.S32 Q1, D10, D12 VMLAL.S32 Q1, D11, D13 VMLAL.S32 Q1, D22, D18 VMLAL.S32 Q1, D23, D19 MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec VEXT.32 Q11, Q6, Q11, #3 SUBS r7 , r7 , #1 VADD.I64 D6, D2, D3 VSHR.S64 D6, #32 @VMOV R11,D6[0] VST1.32 D6[0], [SP] LDR R11, [SP] SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) VMOV.32 D15[1], R2 @state[0] MOV r9, r8, asr r1 STR r9 , [r0], #-4 @*spectrum = y@ VEXT.32 Q6, Q7, Q6, #3 LDRGT r8 , [r0] @r8 = y = *spectrum BGT NEGOUTER_LOOP2_8 B EXIT NEGORDER12: LDR r8 , [r0] @r8 = y = *spectrum NEGOUTER_LOOP2_12: VDUP.32 Q1, R14 @Q1= accu = 0 VMLAL.S32 Q1, D10, D12 VMLAL.S32 Q1, D11, D13 VMLAL.S32 Q1, D22, D18 VMLAL.S32 Q1, D23, D19 VMLAL.S32 Q1, D24, D20 VMLAL.S32 Q1, D25, D21 MOV r8, r8, lsl r1 @y = (*spectrum) << scaleSpec VEXT.32 Q12, Q11, Q12, #3 SUBS r7 , r7 , #1 VADD.I64 D6, D2, D3 VEXT.32 Q11, Q6, Q11, #3 VSHR.S64 D6, #32 @VMOV R11,D6[0] VST1.32 D6[0], [SP] LDR R11, [SP] SUB r8 , r8 , r11, lsl #1 @y=sub32(y,(acc<<1)) MOV r2 , r8 , lsl r6 @ shl32(y, shift_value) VMOV.32 D15[1], R2 @state[0] MOV r9, r8, asr r1 STR r9 , [r0], #-4 @*spectrum = y@ VEXT.32 Q6, Q7, Q6, #3 LDRGT r8 , [r0] @r8 = y = *spectrum BGT NEGOUTER_LOOP2_12 EXIT: ADD sp, sp , #128 vpop {d8-d15} LDMFD r13!, {r4 - r12, r15}