diff options
author | Rakesh Kumar <rakesh.kumar@ittiam.com> | 2017-11-07 22:51:30 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2017-11-07 22:51:30 +0000 |
commit | 1d0fe6aaf99c3ef1e2d9c4c15ad49ad9180da5b5 (patch) | |
tree | 1654eb72f94c15fcda13fc52bf7cc8e1b05db214 /common/arm/ihevc_inter_pred_filters_luma_vert.s | |
parent | 4c7f3d573692c09ccbb56bb2fd51527686d109f5 (diff) | |
parent | 68f18ba505a4c7fb39ec1ca1f6888e95acc1ff51 (diff) | |
download | libhevc-1d0fe6aaf99c3ef1e2d9c4c15ad49ad9180da5b5.tar.gz |
Add PUSH-POP of D registers in Arm Neon 32 bit functions am: a47cb8865a am: 9525ebc765 am: 0671e4cda7 am: ff7a95abd4 am: 6acf9167da am: 85ae219fca
am: 68f18ba505
Change-Id: I3f172309ba2c249d587987bd94d5b5b0937affd3
Diffstat (limited to 'common/arm/ihevc_inter_pred_filters_luma_vert.s')
-rw-r--r-- | common/arm/ihevc_inter_pred_filters_luma_vert.s | 30 |
1 files changed, 21 insertions, 9 deletions
diff --git a/common/arm/ihevc_inter_pred_filters_luma_vert.s b/common/arm/ihevc_inter_pred_filters_luma_vert.s index f51d68c..3d9ab1c 100644 --- a/common/arm/ihevc_inter_pred_filters_luma_vert.s +++ b/common/arm/ihevc_inter_pred_filters_luma_vert.s @@ -103,6 +103,11 @@ @ r12 => *pi1_coeff @ r5 => ht @ r3 => wd + +.equ coeff_offset, 104 +.equ ht_offset, 108 +.equ wd_offset, 112 + .text .align 4 .syntax unified @@ -116,15 +121,16 @@ ihevc_inter_pred_luma_vert_a9q: stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments + vpush {d8 - d15} - ldr r12,[sp,#40] @load pi1_coeff + ldr r12,[sp,#coeff_offset] @load pi1_coeff mov r6,r3 - ldr r5,[sp,#48] @load wd + ldr r5,[sp,#wd_offset] @load wd vld1.u8 {d0},[r12] @coeff = vld1_s8(pi1_coeff) sub r12,r2,r2,lsl #2 @src_ctrd & pi1_coeff vabs.s8 d0,d0 @vabs_s8(coeff) add r0,r0,r12 @r0->pu1_src r12->pi1_coeff - ldr r3,[sp,#44] @load ht + ldr r3,[sp,#ht_offset] @load ht subs r7,r3,#0 @r3->ht @ble end_loops @end loop jump vdup.u8 d22,d0[0] @coeffabs_0 = vdup_lane_u8(coeffabs, 0)@ @@ -407,7 +413,8 @@ end_loops: ldr r1, [sp], #4 ldr r0, [sp], #4 - ldmfdeq sp!,{r4-r12,r15} @reload the registers from sp + beq end1 + mov r5, #4 add r0, r0, #8 add r1, r1, #8 @@ -491,6 +498,8 @@ end_inner_loop_wd_4: add r0,r0,r8 bgt outer_loop_wd_4 +end1: + vpop {d8 - d15} ldmfd sp!, {r4-r12, r15} @reload the registers from sp @@ -564,15 +573,16 @@ end_inner_loop_wd_4: ihevc_inter_pred_luma_vert_w16out_a9q: stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments + vpush {d8 - d15} - ldr r12,[sp,#40] @load pi1_coeff + ldr r12,[sp,#coeff_offset] @load pi1_coeff mov r6,r3 - ldr r5,[sp,#48] @load wd + ldr r5,[sp,#wd_offset] @load wd vld1.u8 {d0},[r12] @coeff = vld1_s8(pi1_coeff) sub r12,r2,r2,lsl #2 @src_ctrd & pi1_coeff vabs.s8 d0,d0 @vabs_s8(coeff) add r0,r0,r12 @r0->pu1_src r12->pi1_coeff - ldr r3,[sp,#44] @load ht + ldr r3,[sp,#ht_offset] @load ht subs r7,r3,#0 @r3->ht @ble end_loops_16out @end loop jump vdup.u8 d22,d0[0] @coeffabs_0 = vdup_lane_u8(coeffabs, 0)@ @@ -848,7 +858,8 @@ end_loops_16out: ldr r1, [sp], #4 ldr r0, [sp], #4 - ldmfdeq sp!,{r4-r12,r15} @reload the registers from sp + beq end2 + mov r5, #4 add r0, r0, #8 add r1, r1, #16 @@ -934,7 +945,8 @@ end_inner_loop_wd_4_16out: add r1,r1,r9,lsl #1 add r0,r0,r8 bgt outer_loop_wd_4_16out - +end2: + vpop {d8 - d15} ldmfd sp!, {r4-r12, r15} @reload the registers from sp |