diff options
Diffstat (limited to 'decoder/armv7/ixheaacd_complex_ifft_p2.s')
-rw-r--r-- | decoder/armv7/ixheaacd_complex_ifft_p2.s | 275 |
1 files changed, 136 insertions, 139 deletions
diff --git a/decoder/armv7/ixheaacd_complex_ifft_p2.s b/decoder/armv7/ixheaacd_complex_ifft_p2.s index ce89139..3a0ce5a 100644 --- a/decoder/armv7/ixheaacd_complex_ifft_p2.s +++ b/decoder/armv7/ixheaacd_complex_ifft_p2.s @@ -4,19 +4,18 @@ ixheaacd_complex_ifft_p2_asm: STMFD sp!, {r0-r12, lr} - SUB sp, sp, #0x28 - LDR r0, [sp, #0x2c] - @LDR r12,[sp,#0x5c+4] + SUB sp, sp, #0x44 + LDR r0, [sp, #0x48] EOR r0, r0, r0, ASR #31 CLZ r0, r0 SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ SUB r0, r0, #1 RSB r0, r0, #0x1e AND r1, r0, #1 - STR r1, [sp, #0x14] + STR r1, [sp, #0x30] MOV r1, r0, ASR #1 - LDR r0, [sp, #0x2c] @npoints - STR r1, [sp, #-4]! + LDR r0, [sp, #0x48] @npoints + STR r1, [sp, #0x18] MOV lr, r0, LSL #1 @(npoints >>1) * 4 MOV r0, #0 @@ -33,7 +32,7 @@ FIRST_STAGE_R4: BIC r7, r4, #0x00FF0000 MOV r7, r7, LSR #8 ORR r4, r7, r6, LSL #8 - LDR r5, [sp, #0x18] + LDR r5, [sp, #0x30] MOV r10, r4, LSR r12 CMP r5, #0 ADDNE r10, r10, #1 @@ -70,24 +69,24 @@ FIRST_STAGE_R4: STMIA r3!, {r4-r11} BLT FIRST_STAGE_R4 - LDR r1, [sp], #4 - LDR r0, [sp, #0x2c] + LDR r1, [sp, #0x18] + LDR r0, [sp, #0x48] MOV r12, #0x40 @nodespacing = 64@ - STR r12, [sp, #0x1c] - LDR r12, [sp, #0x2c] + STR r12, [sp, #0x38] + LDR r12, [sp, #0x48] SUB r3, r3, r0, LSL #3 SUBS r1, r1, #1 - STR r3, [sp, #0x34] + STR r3, [sp, #0x50] MOV r4, r12, ASR #4 MOV r0, #4 - STR r4, [sp, #0x18] - STR r1, [sp, #0x20] + STR r4, [sp, #0x34] + STR r1, [sp, #0x3c] BLE RADIX2 OUTER_LOOP: - LDR r1, [sp, #0x28] - LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@ - STR r1, [sp, #0x10] - LDR r1, [sp, #0x18] + LDR r1, [sp, #0x44] + LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@ + STR r1, [sp, #0x2c] + LDR r1, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LOOP_TRIVIAL_TWIDDLE: @@ -141,11 +140,11 @@ LOOP_TRIVIAL_TWIDDLE: BNE LOOP_TRIVIAL_TWIDDLE MOV r0, r0, ASR #3 - LDR r4, [sp, #0x1c] - LDR r3, [sp, #0x34] + LDR r4, [sp, #0x38] + LDR r3, [sp, #0x50] MUL r1, r0, r4 ADD r12, r3, #8 - STR r1, [sp, #0x24] + STR r1, [sp, #0x40] MOV r3, r1, ASR #2 ADD r3, r3, r1, ASR #3 SUB r3, r3, r1, ASR #4 @@ -153,25 +152,25 @@ LOOP_TRIVIAL_TWIDDLE: SUB r3, r3, r1, ASR #6 ADD r3, r3, r1, ASR #7 SUB r3, r3, r1, ASR #8 - STR r3, [sp, #-4]! + STR r3, [sp, #0x18] SECOND_LOOP: - LDR r3, [sp, #0x10+4] - LDR r14, [sp, #0x18+4] + LDR r3, [sp, #0x2c] + LDR r14, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ - LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ + LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ - LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ + LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ - LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ + LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ - STR r4, [sp, #8+4] - STR r1, [sp, #-4] - STR r2, [sp, #-8] - STR r5, [sp, #-12] - STR r6, [sp, #-16] - STR r7, [sp, #-20] - STR r8, [sp, #-24] + STR r4, [sp, #0x24] + STR r1, [sp, #0x14] + STR r2, [sp, #0x10] + STR r5, [sp, #0x0c] + STR r6, [sp, #0x08] + STR r7, [sp, #0x04] + STR r8, [sp] RADIX4_BFLY: @@ -180,8 +179,8 @@ RADIX4_BFLY: LDRD r10, [r12, r0] @r10=x3r, r11=x3i SUBS r14, r14, #1 - LDR r1, [sp, #-4] - LDR r2, [sp, #-8] + LDR r1, [sp, #0x14] + LDR r2, [sp, #0x10] SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) LSR r3, r3, #31 @@ -198,8 +197,8 @@ RADIX4_BFLY: SUB r7, r7, r6 ADD r6, r4, r5 @ - LDR r1, [sp, #-12] - LDR r2, [sp, #-16] + LDR r1, [sp, #0x0c] + LDR r2, [sp, #0x08] SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) LSR r3, r3, #31 @@ -216,8 +215,8 @@ RADIX4_BFLY: SUB r9, r9, r8 ADD r8, r4, r5 @ - LDR r1, [sp, #-20] - LDR r2, [sp, #-24] + LDR r1, [sp, #0x04] + LDR r2, [sp] SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) LSR r3, r3, #31 @@ -237,7 +236,7 @@ RADIX4_BFLY: @SUB r12,r12,r0,lsl #1 @LDRD r4,[r12] @r4=x0r, r5=x0i LDR r4, [r12, -r0, lsl #1]! @ - LDR r5, [r12, #4] + LDR r5, [r12, #0x04] ADD r4, r8, r4 @x0r = x0r + x2r@ @@ -271,45 +270,45 @@ RADIX4_BFLY: BNE RADIX4_BFLY MOV r0, r0, ASR #3 - LDR r1, [sp, #0x2c+4] - LDR r4, [sp, #8+4] + LDR r1, [sp, #0x48] + LDR r4, [sp, #0x24] SUB r1, r12, r1, LSL #3 - LDR r6, [sp, #0x1c+4] + LDR r6, [sp, #0x38] ADD r12, r1, #8 - LDR r7, [sp, #0] + LDR r7, [sp, #0x18] ADD r4, r4, r6 CMP r4, r7 BLE SECOND_LOOP SECOND_LOOP_2: - LDR r3, [sp, #0x10+4] - LDR r14, [sp, #0x18+4] + LDR r3, [sp, #0x2c] + LDR r14, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ - LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ + LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ - LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ + LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ SUB r3, r3, #2048 @ 512 *4 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ - LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ + LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ - STR r4, [sp, #8+4] + STR r4, [sp, #0x24] - STR r1, [sp, #-4] - STR r2, [sp, #-8] - STR r5, [sp, #-12] - STR r6, [sp, #-16] - STR r7, [sp, #-20] - STR r8, [sp, #-24] + STR r1, [sp, #0x14] + STR r2, [sp, #0x10] + STR r5, [sp, #0x0c] + STR r6, [sp, #0x08] + STR r7, [sp, #0x04] + STR r8, [sp] RADIX4_BFLY_2: LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r8, [r12, r0]! @r8=x2r, r9=x2i LDRD r10, [r12, r0] @r10=x3r, r11=x3i SUBS r14, r14, #1 - LDR r1, [sp, #-4] - LDR r2, [sp, #-8] + LDR r1, [sp, #0x14] + LDR r2, [sp, #0x10] SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) LSR r3, r3, #31 @@ -326,8 +325,8 @@ RADIX4_BFLY_2: SUB r7, r7, r6 ADD r6, r4, r5 @ - LDR r1, [sp, #-12] - LDR r2, [sp, #-16] + LDR r1, [sp, #0x0c] + LDR r2, [sp, #0x08] SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) LSR r3, r3, #31 @@ -344,8 +343,8 @@ RADIX4_BFLY_2: SUB r9, r9, r8 ADD r8, r4, r5 @ - LDR r1, [sp, #-20] - LDR r2, [sp, #-24] + LDR r1, [sp, #0x04] + LDR r2, [sp] SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) LSR r3, r3, #31 @@ -365,7 +364,7 @@ RADIX4_BFLY_2: @SUB r12,r12,r0,lsl #1 @LDRD r4,[r12] @r4=x0r, r5=x0i LDR r4, [r12, -r0, lsl #1]! @ - LDR r5, [r12, #4] + LDR r5, [r12, #0x04] ADD r4, r8, r4 @x0r = x0r + x2r@ @@ -399,39 +398,39 @@ RADIX4_BFLY_2: BNE RADIX4_BFLY_2 MOV r0, r0, ASR #3 - LDR r1, [sp, #0x2c+4] - LDR r4, [sp, #8+4] + LDR r1, [sp, #0x48] + LDR r4, [sp, #0x24] SUB r1, r12, r1, LSL #3 - LDR r6, [sp, #0x1c+4] + LDR r6, [sp, #0x38] ADD r12, r1, #8 - LDR r7, [sp, #0x24+4] + LDR r7, [sp, #0x40] ADD r4, r4, r6 CMP r4, r7, ASR #1 BLE SECOND_LOOP_2 - LDR r7, [sp, #0] + LDR r7, [sp, #0x18] CMP r4, r7, LSL #1 BGT SECOND_LOOP_4 SECOND_LOOP_3: - LDR r3, [sp, #0x10+4] - LDR r14, [sp, #0x18+4] + LDR r3, [sp, #0x2c] + LDR r14, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ - LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ + LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ SUB r3, r3, #2048 @ 512 *4 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ - LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ + LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ - LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ + LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ - STR r4, [sp, #8+4] - STR r1, [sp, #-4] - STR r2, [sp, #-8] - STR r5, [sp, #-12] - STR r6, [sp, #-16] - STR r7, [sp, #-20] - STR r8, [sp, #-24] + STR r4, [sp, #0x24] + STR r1, [sp, #0x14] + STR r2, [sp, #0x10] + STR r5, [sp, #0x0c] + STR r6, [sp, #0x08] + STR r7, [sp, #0x04] + STR r8, [sp] RADIX4_BFLY_3: @@ -440,8 +439,8 @@ RADIX4_BFLY_3: LDRD r10, [r12, r0] @r10=x3r, r11=x3i SUBS r14, r14, #1 - LDR r1, [sp, #-4] - LDR r2, [sp, #-8] + LDR r1, [sp, #0x14] + LDR r2, [sp, #0x10] SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) LSR r3, r3, #31 @@ -458,8 +457,8 @@ RADIX4_BFLY_3: SUB r7, r7, r6 ADD r6, r4, r5 @ - LDR r1, [sp, #-12] - LDR r2, [sp, #-16] + LDR r1, [sp, #0x0c] + LDR r2, [sp, #0x08] SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) LSR r3, r3, #31 @@ -476,8 +475,8 @@ RADIX4_BFLY_3: SUB r8, r8, r9 ADD r9, r5, r4 @ - LDR r1, [sp, #-20] - LDR r2, [sp, #-24] + LDR r1, [sp, #0x04] + LDR r2, [sp] SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) LSR r3, r3, #31 @@ -497,7 +496,7 @@ RADIX4_BFLY_3: @SUB r12,r12,r0,lsl #1 @LDRD r4,[r12] @r4=x0r, r5=x0i LDR r4, [r12, -r0, lsl #1]! @ - LDR r5, [r12, #4] + LDR r5, [r12, #0x04] ADD r4, r8, r4 @x0r = x0r + x2r@ @@ -531,38 +530,38 @@ RADIX4_BFLY_3: BNE RADIX4_BFLY_3 MOV r0, r0, ASR #3 - LDR r1, [sp, #0x2c+4] - LDR r4, [sp, #8+4] + LDR r1, [sp, #0x48] + LDR r4, [sp, #0x24] SUB r1, r12, r1, LSL #3 - LDR r6, [sp, #0x1c+4] + LDR r6, [sp, #0x38] ADD r12, r1, #8 - LDR r7, [sp, #0] + LDR r7, [sp, #0x18] ADD r4, r4, r6 CMP r4, r7, LSL #1 BLE SECOND_LOOP_3 SECOND_LOOP_4: - LDR r3, [sp, #0x10+4] - LDR r14, [sp, #0x18+4] + LDR r3, [sp, #0x2c] + LDR r14, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ - LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ + LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ SUB r3, r3, #2048 @ 512 *4 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ - LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ + LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ SUB r3, r3, #2048 @ 512 *4 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ - LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ + LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ - STR r4, [sp, #8+4] - STR r1, [sp, #-4] - STR r2, [sp, #-8] - STR r5, [sp, #-12] - STR r6, [sp, #-16] - STR r7, [sp, #-20] - STR r8, [sp, #-24] + STR r4, [sp, #0x24] + STR r1, [sp, #0x14] + STR r2, [sp, #0x10] + STR r5, [sp, #0x0c] + STR r6, [sp, #0x08] + STR r7, [sp, #0x04] + STR r8, [sp] RADIX4_BFLY_4: LDRD r6, [r12, r0]! @r6=x1r, r7=x1i @@ -570,8 +569,8 @@ RADIX4_BFLY_4: LDRD r10, [r12, r0] @r10=x3r, r11=x3i SUBS r14, r14, #1 - LDR r1, [sp, #-4] - LDR r2, [sp, #-8] + LDR r1, [sp, #0x14] + LDR r2, [sp, #0x10] SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) LSR r3, r3, #31 @@ -588,8 +587,8 @@ RADIX4_BFLY_4: SUB r7, r7, r6 ADD r6, r4, r5 @ - LDR r1, [sp, #-12] - LDR r2, [sp, #-16] + LDR r1, [sp, #0x0c] + LDR r2, [sp, #0x08] SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) LSR r3, r3, #31 @@ -606,8 +605,8 @@ RADIX4_BFLY_4: SUB r8, r8, r9 ADD r9, r5, r4 @ - LDR r1, [sp, #-20] - LDR r2, [sp, #-24] + LDR r1, [sp, #0x04] + LDR r2, [sp] SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) LSR r3, r3, #31 @@ -628,7 +627,7 @@ RADIX4_BFLY_4: @SUB r12,r12,r0,lsl #1 @LDRD r4,[r12] @r4=x0r, r5=x0i LDR r4, [r12, -r0, lsl #1]! @ - LDR r5, [r12, #4] + LDR r5, [r12, #0x04] ADD r4, r8, r4 @x0r = x0r + x2r@ @@ -662,48 +661,46 @@ RADIX4_BFLY_4: BNE RADIX4_BFLY_4 MOV r0, r0, ASR #3 - LDR r1, [sp, #0x2c+4] - LDR r4, [sp, #8+4] + LDR r1, [sp, #0x48] + LDR r4, [sp, #0x24] SUB r1, r12, r1, LSL #3 - LDR r6, [sp, #0x1c+4] + LDR r6, [sp, #0x38] ADD r12, r1, #8 - LDR r7, [sp, #0x24+4] + LDR r7, [sp, #0x40] ADD r4, r4, r6 CMP r4, r7 BLT SECOND_LOOP_4 - ADD sp, sp, #4 - LDR r1, [sp, #0x1c] + LDR r1, [sp, #0x38] MOV r0, r0, LSL #2 MOV r1, r1, ASR #2 - STR r1, [sp, #0x1c] - LDR r1, [sp, #0x18] + STR r1, [sp, #0x38] + LDR r1, [sp, #0x34] MOV r1, r1, ASR #2 - STR r1, [sp, #0x18] - LDR r1, [sp, #0x20] + STR r1, [sp, #0x34] + LDR r1, [sp, #0x3c] SUBS r1, r1, #1 - STR r1, [sp, #0x20] + STR r1, [sp, #0x3c] BGT OUTER_LOOP RADIX2: - LDR r1, [sp, #0x14] + LDR r1, [sp, #0x30] CMP r1, #0 BEQ EXIT - LDR r12, [sp, #0x1c] - LDR r1, [sp, #0x28] + LDR r12, [sp, #0x38] + LDR r1, [sp, #0x44] CMP r12, #0 - LDRNE r12, [sp, #0x1c] MOVEQ r4, #1 MOVNE r4, r12, LSL #1 MOVS r3, r0 BEQ EXIT MOV r3, r3, ASR #1 - LDR r5, [sp, #0x34] + LDR r5, [sp, #0x50] MOV r0, r0, LSL #3 @(del<<1) * 4 - STR r1, [sp, #-4] + STR r1, [sp, #0x18] RADIX2_BFLY: - LDR r1, [sp, #-4] + LDR r1, [sp, #0x18] LDRD r6, [r5] @r6 = x0r ADD r5, r5, r0 LDRD r8, [r5] @r8 = x1r @@ -720,10 +717,10 @@ RADIX2_BFLY: ORR r10, r1, r10, LSL#1 - LDR r1, [sp, #-4] - LDR r2, [r1, #4] + LDR r1, [sp, #0x18] + LDR r2, [r1, #0x04] ADD r1, r1, r4, LSL #3 - STR r1, [sp, #-4] + STR r1, [sp, #0x18] SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) LSR r1, r1, #31 @@ -750,11 +747,11 @@ RADIX2_BFLY: BNE RADIX2_BFLY - LDR r1, [sp, #0x28] + LDR r1, [sp, #0x44] MOV r3, r0, ASR #4 - STR r1, [sp, #-4] + STR r1, [sp, #0x18] RADIX2_BFLY_2: - LDR r1, [sp, #-4] + LDR r1, [sp, #0x18] LDRD r6, [r5] @r6 = x0r ADD r5, r5, r0 LDRD r8, [r5] @r8 = x1r @@ -772,10 +769,10 @@ RADIX2_BFLY_2: ORR r10, r1, r10, LSL#1 - LDR r1, [sp, #-4] - LDR r2, [r1, #4] + LDR r1, [sp, #0x18] + LDR r2, [r1, #0x04] ADD r1, r1, r4, LSL #3 - STR r1, [sp, #-4] + STR r1, [sp, #0x18] SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) LSR r1, r1, #31 @@ -804,6 +801,6 @@ RADIX2_BFLY_2: BNE RADIX2_BFLY_2 EXIT: - ADD sp, sp, #0x38 + ADD sp, sp, #0x54 LDMFD sp!, {r4-r12, pc} |