.text .p2align 2 .global ixheaacd_complex_fft_p2_asm .type ixheaacd_complex_fft_p2_asm, %function ixheaacd_complex_fft_p2_asm: STMFD sp!, {r0-r12, lr} SUB sp, sp, #0x44 LDR r0, [sp, #0x48] EOR r0, r0, r0, ASR #31 CLZ r0, r0 SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ SUB r0, r0, #1 RSB r0, r0, #0x1e AND r1, r0, #1 STR r1, [sp, #0x30] MOV r1, r0, ASR #1 LDR r0, [sp, #0x48] @npoints STR r1, [sp, #0x18] MOV lr, r0, LSL #1 @(npoints >>1) * 4 MOV r0, #0 FIRST_STAGE_R4: MOVW r4, #0x3333 MOVT r4, #0x3333 MOVW r5, #0x0F0F MOVT r5, #0x0F0F AND r6, r4, r0 AND r7, r4, r0, LSR #2 ORR r4, r7, r6, LSL #2 AND r6, r5, r4 AND r7, r5, r4, LSR #4 ORR r4, r7, r6, LSL #4 BIC r6, r4, #0x0000FF00 BIC r7, r4, #0x00FF0000 MOV r7, r7, LSR #8 ORR r4, r7, r6, LSL #8 LDR r5, [sp, #0x30] MOV r10, r4, LSR r12 CMP r5, #0 ADDNE r10, r10, #1 BICNE r10, r10, #1 ADD r1, r2, r10, LSL #2 LDRD r4, [r1] @r4=x0r, r5=x0i ADD r1, r1, lr LDRD r8, [r1] @r8=x1r, r9=x1i ADD r1, r1, lr LDRD r6, [r1] @r6=x2r, r7=x2i ADD r1, r1, lr LDRD r10, [r1] @r10=x3r, r11=x3i ADD r0, r0, #4 CMP r0, lr, ASR #1 ADD r4, r4, r6 @x0r = x0r + x2r@ ADD r5, r5, r7 @x0i = x0i + x2i@ SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@ SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@ ADD r8, r8, r10 @x1r = x1r + x3r@ ADD r9, r9, r11 @x1i = x1i + x3i@ SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@ SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@ ADD r4, r4, r8 @x0r = x0r + x1r@ ADD r5, r5, r9 @x0i = x0i + x1i@ SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@ SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1) ADD r6, r6, r11 @x2r = x2r + x3i@ SUB r7, r7, r1 @x2i = x2i - x3r@ SUB r10, r6, r11, lsl#1 @x3i = x2r - (x3i << 1)@ ADD r11, r7, r1, lsl#1 @x3r = x2i + (x3r << 1)@ STMIA r3!, {r4-r11} BLT FIRST_STAGE_R4 LDR r1, [sp, #0x18] LDR r0, [sp, #0x48] MOV r12, #0x40 @nodespacing = 64@ STR r12, [sp, #0x38] LDR r12, [sp, #0x48] SUB r3, r3, r0, LSL #3 SUBS r1, r1, #1 STR r3, [sp, #0x50] MOV r4, r12, ASR #4 MOV r0, #4 STR r4, [sp, #0x34] STR r1, [sp, #0x3c] BLE RADIX2 OUTER_LOOP: LDR r1, [sp, #0x44] LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@ STR r1, [sp, #0x2c] LDR r1, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LOOP_TRIVIAL_TWIDDLE: LDRD r4, [r12] @r4=x0r, r5=x0i ADD r12, r12, r0 LDRD r6, [r12] @r6=x1r, r7=x1i ADD r12, r12, r0 LDRD r8, [r12] @r8=x2r, r9=x2i ADD r12, r12, r0 LDRD r10, [r12] @r10=x3r, r11=x3i @MOV r4,r4,ASR #1 @MOV r5,r5,ASR #1 @MOV r6,r6,ASR #1 @MOV r7,r7,ASR #1 @MOV r8,r8,ASR #1 @MOV r9,r9,ASR #1 @MOV r10,r10,ASR #1 @MOV r11,r11,ASR #1 ADD r4, r4, r8 @x0r = x0r + x2r@ ADD r5, r5, r9 @x0i = x0i + x2i@ SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@ SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@ ADD r6, r6, r10 @x1r = x1r + x3r@ ADD r7, r7, r11 @x1i = x1i + x3i@ SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@ SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@ ADD r4, r4, r6 @x0r = x0r + x1r@ ADD r5, r5, r7 @x0i = x0i + x1i@ @MOV r4,r4,ASR #1 @MOV r5,r5,ASR #1 SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@ SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1) ADD r8, r8, r11 @x2r = x2r + x3i@ SUB r9, r9, r2 @x2i = x2i - x3r@ SUB r10, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ ADD r11, r9, r2, lsl#1 @x3r = x2i + (x3r << 1) STRD r10, [r12] @r10=x3r, r11=x3i SUB r12, r12, r0 STRD r6, [r12] @r6=x1r, r7=x1i SUB r12, r12, r0 STRD r8, [r12] @r8=x2r, r9=x2i SUB r12, r12, r0 STRD r4, [r12] @r4=x0r, r5=x0i ADD r12, r12, r0, lsl #2 SUBS r1, r1, #1 BNE LOOP_TRIVIAL_TWIDDLE MOV r0, r0, ASR #3 LDR r4, [sp, #0x38] LDR r3, [sp, #0x50] MUL r1, r0, r4 ADD r12, r3, #8 STR r1, [sp, #0x40] MOV r3, r1, ASR #2 ADD r3, r3, r1, ASR #3 SUB r3, r3, r1, ASR #4 ADD r3, r3, r1, ASR #5 SUB r3, r3, r1, ASR #6 ADD r3, r3, r1, ASR #7 SUB r3, r3, r1, ASR #8 STR r3, [sp, #0x18] SECOND_LOOP: LDR r3, [sp, #0x2c] LDR r14, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ STR r4, [sp, #0x24] STR r1, [sp, #0x14] STR r2, [sp, #0x10] STR r5, [sp, #0x0c] STR r6, [sp, #0x08] STR r7, [sp, #0x04] STR r8, [sp] RADIX4_BFLY: LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r8, [r12, r0]! @r8=x2r, r9=x2i LDRD r10, [r12, r0] @r10=x3r, r11=x3i SUBS r14, r14, #1 LDR r1, [sp, #0x14] LDR r2, [sp, #0x10] SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) LSR r3, r3, #31 ORR r6, r3, r6, LSL#1 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) LSR r3, r3, #31 ORR r7, r3, r7, LSL#1 ADD r7, r7, r6 SUB r6, r4, r5 @ LDR r1, [sp, #0x0c] LDR r2, [sp, #0x08] SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) LSR r3, r3, #31 ORR r8, r3, r8, LSL#1 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) LSR r3, r3, #31 ORR r9, r3, r9, LSL#1 ADD r9, r9, r8 SUB r8, r4, r5 @ LDR r1, [sp, #0x04] LDR r2, [sp] SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) LSR r3, r3, #31 ORR r10, r3, r10, LSL#1 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) LSR r3, r3, #31 ORR r11, r3, r11, LSL#1 ADD r11, r11, r10 SUB r10, r4, r5 @ @SUB r12,r12,r0,lsl #1 @LDRD r4,[r12] @r4=x0r, r5=x0i LDR r4, [r12, -r0, lsl #1]! @ LDR r5, [r12, #0x04] ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r5, r9, r5 @x0i = x0i + x2i@ SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ ADD r6, r6, r10 @x1r = x1r + x3r@ ADD r7, r7, r11 @x1i = x1i + x3i@ SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ ADD r4, r4, r6 @x0r = x0r + x1r@ ADD r5, r5, r7 @x0i = x0i + x1i@ SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) STRD r4, [r12] @r4=x0r, r5=x0i ADD r12, r12, r0 ADD r8, r8, r11 @x2r = x2r + x3i@ SUB r9, r9, r10 @x2i = x2i - x3r@ SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) STRD r8, [r12] @r8=x2r, r9=x2i ADD r12, r12, r0 STRD r6, [r12] @r6=x1r, r7=x1i ADD r12, r12, r0 STRD r4, [r12] @r10=x3r, r11=x3i ADD r12, r12, r0 BNE RADIX4_BFLY MOV r0, r0, ASR #3 LDR r1, [sp, #0x48] LDR r4, [sp, #0x24] SUB r1, r12, r1, LSL #3 LDR r6, [sp, #0x38] ADD r12, r1, #8 LDR r7, [sp, #0x18] ADD r4, r4, r6 CMP r4, r7 BLE SECOND_LOOP SECOND_LOOP_2: LDR r3, [sp, #0x2c] LDR r14, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ SUB r3, r3, #2048 @ 512 *4 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ STR r4, [sp, #0x24] STR r1, [sp, #0x14] STR r2, [sp, #0x10] STR r5, [sp, #0x0c] STR r6, [sp, #0x08] STR r7, [sp, #0x04] STR r8, [sp] RADIX4_BFLY_2: LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r8, [r12, r0]! @r8=x2r, r9=x2i LDRD r10, [r12, r0] @r10=x3r, r11=x3i SUBS r14, r14, #1 LDR r1, [sp, #0x14] LDR r2, [sp, #0x10] SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) LSR r3, r3, #31 ORR r6, r3, r6, LSL#1 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) LSR r3, r3, #31 ORR r7, r3, r7, LSL#1 ADD r7, r7, r6 SUB r6, r4, r5 @ LDR r1, [sp, #0x0c] LDR r2, [sp, #0x08] SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) LSR r3, r3, #31 ORR r8, r3, r8, LSL#1 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) LSR r3, r3, #31 ORR r9, r3, r9, LSL#1 ADD r9, r9, r8 SUB r8, r4, r5 @ LDR r1, [sp, #0x04] LDR r2, [sp] SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) LSR r3, r3, #31 ORR r10, r3, r10, LSL#1 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) LSR r3, r3, #31 ORR r11, r3, r11, LSL#1 ADD r10, r11, r10 SUB r11, r5, r4 @ @SUB r12,r12,r0,lsl #1 @LDRD r4,[r12] @r4=x0r, r5=x0i LDR r4, [r12, -r0, lsl #1]! @ LDR r5, [r12, #0x04] ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r5, r9, r5 @x0i = x0i + x2i@ SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ ADD r6, r6, r10 @x1r = x1r + x3r@ ADD r7, r7, r11 @x1i = x1i + x3i@ SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ ADD r4, r4, r6 @x0r = x0r + x1r@ ADD r5, r5, r7 @x0i = x0i + x1i@ SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) STRD r4, [r12] @r4=x0r, r5=x0i ADD r12, r12, r0 ADD r8, r8, r11 @x2r = x2r + x3i@ SUB r9, r9, r10 @x2i = x2i - x3r@ SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) STRD r8, [r12] @r8=x2r, r9=x2i ADD r12, r12, r0 STRD r6, [r12] @r6=x1r, r7=x1i ADD r12, r12, r0 STRD r4, [r12] @r10=x3r, r11=x3i ADD r12, r12, r0 BNE RADIX4_BFLY_2 MOV r0, r0, ASR #3 LDR r1, [sp, #0x48] LDR r4, [sp, #0x24] SUB r1, r12, r1, LSL #3 LDR r6, [sp, #0x38] ADD r12, r1, #8 LDR r7, [sp, #0x40] ADD r4, r4, r6 CMP r4, r7, ASR #1 BLE SECOND_LOOP_2 LDR r7, [sp, #0x18] CMP r4, r7, LSL #1 BGT SECOND_LOOP_4 SECOND_LOOP_3: LDR r3, [sp, #0x2c] LDR r14, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ SUB r3, r3, #2048 @ 512 *4 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ STR r4, [sp, #0x24] STR r1, [sp, #0x14] STR r2, [sp, #0x10] STR r5, [sp, #0x0c] STR r6, [sp, #0x08] STR r7, [sp, #0x04] STR r8, [sp] RADIX4_BFLY_3: LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r8, [r12, r0]! @r8=x2r, r9=x2i LDRD r10, [r12, r0] @r10=x3r, r11=x3i SUBS r14, r14, #1 LDR r1, [sp, #0x14] LDR r2, [sp, #0x10] SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) LSR r3, r3, #31 ORR r6, r3, r6, LSL#1 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) LSR r3, r3, #31 ORR r7, r3, r7, LSL#1 ADD r7, r7, r6 SUB r6, r4, r5 @ LDR r1, [sp, #0x0c] LDR r2, [sp, #0x08] SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) LSR r3, r3, #31 ORR r8, r3, r8, LSL#1 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) LSR r3, r3, #31 ORR r9, r3, r9, LSL#1 ADD r8, r9, r8 SUB r9, r5, r4 @ LDR r1, [sp, #0x04] LDR r2, [sp] SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) LSR r3, r3, #31 ORR r10, r3, r10, LSL#1 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) LSR r3, r3, #31 ORR r11, r3, r11, LSL#1 ADD r10, r11, r10 SUB r11, r5, r4 @ @SUB r12,r12,r0,lsl #1 @LDRD r4,[r12] @r4=x0r, r5=x0i LDR r4, [r12, -r0, lsl #1]! @ LDR r5, [r12, #0x04] ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r5, r9, r5 @x0i = x0i + x2i@ SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ ADD r6, r6, r10 @x1r = x1r + x3r@ ADD r7, r7, r11 @x1i = x1i + x3i@ SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ ADD r4, r4, r6 @x0r = x0r + x1r@ ADD r5, r5, r7 @x0i = x0i + x1i@ SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) STRD r4, [r12] @r4=x0r, r5=x0i ADD r12, r12, r0 ADD r8, r8, r11 @x2r = x2r + x3i@ SUB r9, r9, r10 @x2i = x2i - x3r@ SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) STRD r8, [r12] @r8=x2r, r9=x2i ADD r12, r12, r0 STRD r6, [r12] @r6=x1r, r7=x1i ADD r12, r12, r0 STRD r4, [r12] @r10=x3r, r11=x3i ADD r12, r12, r0 BNE RADIX4_BFLY_3 MOV r0, r0, ASR #3 LDR r1, [sp, #0x48] LDR r4, [sp, #0x24] SUB r1, r12, r1, LSL #3 LDR r6, [sp, #0x38] ADD r12, r1, #8 LDR r7, [sp, #0x18] ADD r4, r4, r6 CMP r4, r7, LSL #1 BLE SECOND_LOOP_3 SECOND_LOOP_4: LDR r3, [sp, #0x2c] LDR r14, [sp, #0x34] MOV r0, r0, LSL #3 @(del<<1) * 4 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ SUB r3, r3, #2048 @ 512 *4 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ SUB r3, r3, #2048 @ 512 *4 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ STR r4, [sp, #0x24] STR r1, [sp, #0x14] STR r2, [sp, #0x10] STR r5, [sp, #0x0c] STR r6, [sp, #0x08] STR r7, [sp, #0x04] STR r8, [sp] RADIX4_BFLY_4: LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r8, [r12, r0]! @r8=x2r, r9=x2i LDRD r10, [r12, r0] @r10=x3r, r11=x3i SUBS r14, r14, #1 LDR r1, [sp, #0x14] LDR r2, [sp, #0x10] SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) LSR r3, r3, #31 ORR r6, r3, r6, LSL#1 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) LSR r3, r3, #31 ORR r7, r3, r7, LSL#1 ADD r7, r7, r6 SUB r6, r4, r5 @ LDR r1, [sp, #0x0c] LDR r2, [sp, #0x08] SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) LSR r3, r3, #31 ORR r8, r3, r8, LSL#1 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) LSR r3, r3, #31 ORR r9, r3, r9, LSL#1 ADD r8, r9, r8 SUB r9, r5, r4 @ LDR r1, [sp, #0x04] LDR r2, [sp] SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) LSR r3, r3, #31 ORR r4, r3, r4, LSL#1 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) LSR r3, r3, #31 ORR r10, r3, r10, LSL#1 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) LSR r3, r3, #31 ORR r5, r3, r5, LSL#1 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) LSR r3, r3, #31 ORR r11, r3, r11, LSL#1 ADD r11, r11, r10 SUB r10, r5, r4 @ @SUB r12,r12,r0,lsl #1 @LDRD r4,[r12] @r4=x0r, r5=x0i LDR r4, [r12, -r0, lsl #1]! @ LDR r5, [r12, #0x04] ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r5, r9, r5 @x0i = x0i + x2i@ SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ ADD r6, r6, r10 @x1r = x1r + x3r@ SUB r7, r7, r11 @x1i = x1i - x3i@ SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@ ADD r4, r4, r6 @x0r = x0r + x1r@ ADD r5, r5, r7 @x0i = x0i + x1i@ SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) STRD r4, [r12] @r4=x0r, r5=x0i ADD r12, r12, r0 ADD r8, r8, r11 @x2r = x2r + x3i@ SUB r9, r9, r10 @x2i = x2i - x3r@ SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) STRD r8, [r12] @r8=x2r, r9=x2i ADD r12, r12, r0 STRD r6, [r12] @r6=x1r, r7=x1i ADD r12, r12, r0 STRD r4, [r12] @r10=x3r, r11=x3i ADD r12, r12, r0 BNE RADIX4_BFLY_4 MOV r0, r0, ASR #3 LDR r1, [sp, #0x48] LDR r4, [sp, #0x24] SUB r1, r12, r1, LSL #3 LDR r6, [sp, #0x38] ADD r12, r1, #8 LDR r7, [sp, #0x40] ADD r4, r4, r6 CMP r4, r7 BLT SECOND_LOOP_4 LDR r1, [sp, #0x38] MOV r0, r0, LSL #2 MOV r1, r1, ASR #2 STR r1, [sp, #0x38] LDR r1, [sp, #0x34] MOV r1, r1, ASR #2 STR r1, [sp, #0x34] LDR r1, [sp, #0x3c] SUBS r1, r1, #1 STR r1, [sp, #0x3c] BGT OUTER_LOOP RADIX2: LDR r1, [sp, #0x30] CMP r1, #0 BEQ EXIT LDR r12, [sp, #0x38] LDR r1, [sp, #0x44] CMP r12, #0 MOVEQ r4, #1 MOVNE r4, r12, LSL #1 MOVS r3, r0 BEQ EXIT MOV r3, r3, ASR #1 LDR r5, [sp, #0x50] MOV r0, r0, LSL #3 @(del<<1) * 4 STR r1, [sp, #0x18] RADIX2_BFLY: LDR r1, [sp, #0x18] LDRD r6, [r5] @r6 = x0r ADD r5, r5, r0 LDRD r8, [r5] @r8 = x1r LDR r2, [r1] SUBS r3, r3, #1 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) LSR r1, r1, #31 ORR r11, r1, r11, LSL#1 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) LSR r1, r1, #31 ORR r10, r1, r10, LSL#1 LDR r1, [sp, #0x18] LDR r2, [r1, #0x04] ADD r1, r1, r4, LSL #3 STR r1, [sp, #0x18] SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) LSR r1, r1, #31 ORR r8, r1, r8, LSL#1 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) LSR r1, r1, #31 ORR r9, r1, r9, LSL#1 SUB r8, r8, r10 ADD r9, r9, r11 ADD r10, r8, r6 @(x0r/2) + (x1r/2) ASR r10, r10, #1 ADD r11, r9, r7 @(x0i/2) + (x1i/2)@ ASR r11, r11, #1 SUB r8, r6, r8 @(x0r/2) - (x1r/2) ASR r8, r8, #1 SUB r9, r7, r9 @(x0i/2) - (x1i/2)@ ASR r9, r9, #1 STRD r8, [r5] SUB r5, r5, r0 STRD r10, [r5], #8 BNE RADIX2_BFLY LDR r1, [sp, #0x44] MOV r3, r0, ASR #4 STR r1, [sp, #0x18] RADIX2_BFLY_2: LDR r1, [sp, #0x18] LDRD r6, [r5] @r6 = x0r ADD r5, r5, r0 LDRD r8, [r5] @r8 = x1r LDR r2, [r1] SUBS r3, r3, #1 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) LSR r1, r1, #31 ORR r11, r1, r11, LSL#1 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) LSR r1, r1, #31 ORR r10, r1, r10, LSL#1 LDR r1, [sp, #0x18] LDR r2, [r1, #0x04] ADD r1, r1, r4, LSL #3 STR r1, [sp, #0x18] SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) LSR r1, r1, #31 ORR r8, r1, r8, LSL#1 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) LSR r1, r1, #31 ORR r9, r1, r9, LSL#1 ADD r11, r11, r9 SUB r9, r10, r8 @ MOV r8, r11 ADD r10, r8, r6 @(x0r>>1) + (x1r) ASR r10, r10, #1 ADD r11, r9, r7 @(x0i>>1) + (x1i)@ ASR r11, r11, #1 SUB r8, r6, r8 @(x0r>>1) - (x1r) ASR r8, r8, #1 SUB r9, r7, r9 @(x0i>>1) - (x1i)@ ASR r9, r9, #1 STRD r8, [r5] SUB r5, r5, r0 STRD r10, [r5], #8 BNE RADIX2_BFLY_2 EXIT: ADD sp, sp, #0x54 LDMFD sp!, {r4-r12, pc}