aboutsummaryrefslogtreecommitdiff
path: root/common/arm/ihevc_inter_pred_filters_luma_vert.s
diff options
context:
space:
mode:
authorRakesh Kumar <rakesh.kumar@ittiam.com>2017-11-07 22:51:30 +0000
committerandroid-build-merger <android-build-merger@google.com>2017-11-07 22:51:30 +0000
commit1d0fe6aaf99c3ef1e2d9c4c15ad49ad9180da5b5 (patch)
tree1654eb72f94c15fcda13fc52bf7cc8e1b05db214 /common/arm/ihevc_inter_pred_filters_luma_vert.s
parent4c7f3d573692c09ccbb56bb2fd51527686d109f5 (diff)
parent68f18ba505a4c7fb39ec1ca1f6888e95acc1ff51 (diff)
downloadlibhevc-1d0fe6aaf99c3ef1e2d9c4c15ad49ad9180da5b5.tar.gz
Add PUSH-POP of D registers in Arm Neon 32 bit functions am: a47cb8865a am: 9525ebc765 am: 0671e4cda7 am: ff7a95abd4 am: 6acf9167da am: 85ae219fca
am: 68f18ba505 Change-Id: I3f172309ba2c249d587987bd94d5b5b0937affd3
Diffstat (limited to 'common/arm/ihevc_inter_pred_filters_luma_vert.s')
-rw-r--r--common/arm/ihevc_inter_pred_filters_luma_vert.s30
1 files changed, 21 insertions, 9 deletions
diff --git a/common/arm/ihevc_inter_pred_filters_luma_vert.s b/common/arm/ihevc_inter_pred_filters_luma_vert.s
index f51d68c..3d9ab1c 100644
--- a/common/arm/ihevc_inter_pred_filters_luma_vert.s
+++ b/common/arm/ihevc_inter_pred_filters_luma_vert.s
@@ -103,6 +103,11 @@
@ r12 => *pi1_coeff
@ r5 => ht
@ r3 => wd
+
+.equ coeff_offset, 104
+.equ ht_offset, 108
+.equ wd_offset, 112
+
.text
.align 4
.syntax unified
@@ -116,15 +121,16 @@
ihevc_inter_pred_luma_vert_a9q:
stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
+ vpush {d8 - d15}
- ldr r12,[sp,#40] @load pi1_coeff
+ ldr r12,[sp,#coeff_offset] @load pi1_coeff
mov r6,r3
- ldr r5,[sp,#48] @load wd
+ ldr r5,[sp,#wd_offset] @load wd
vld1.u8 {d0},[r12] @coeff = vld1_s8(pi1_coeff)
sub r12,r2,r2,lsl #2 @src_ctrd & pi1_coeff
vabs.s8 d0,d0 @vabs_s8(coeff)
add r0,r0,r12 @r0->pu1_src r12->pi1_coeff
- ldr r3,[sp,#44] @load ht
+ ldr r3,[sp,#ht_offset] @load ht
subs r7,r3,#0 @r3->ht
@ble end_loops @end loop jump
vdup.u8 d22,d0[0] @coeffabs_0 = vdup_lane_u8(coeffabs, 0)@
@@ -407,7 +413,8 @@ end_loops:
ldr r1, [sp], #4
ldr r0, [sp], #4
- ldmfdeq sp!,{r4-r12,r15} @reload the registers from sp
+ beq end1
+
mov r5, #4
add r0, r0, #8
add r1, r1, #8
@@ -491,6 +498,8 @@ end_inner_loop_wd_4:
add r0,r0,r8
bgt outer_loop_wd_4
+end1:
+ vpop {d8 - d15}
ldmfd sp!, {r4-r12, r15} @reload the registers from sp
@@ -564,15 +573,16 @@ end_inner_loop_wd_4:
ihevc_inter_pred_luma_vert_w16out_a9q:
stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
+ vpush {d8 - d15}
- ldr r12,[sp,#40] @load pi1_coeff
+ ldr r12,[sp,#coeff_offset] @load pi1_coeff
mov r6,r3
- ldr r5,[sp,#48] @load wd
+ ldr r5,[sp,#wd_offset] @load wd
vld1.u8 {d0},[r12] @coeff = vld1_s8(pi1_coeff)
sub r12,r2,r2,lsl #2 @src_ctrd & pi1_coeff
vabs.s8 d0,d0 @vabs_s8(coeff)
add r0,r0,r12 @r0->pu1_src r12->pi1_coeff
- ldr r3,[sp,#44] @load ht
+ ldr r3,[sp,#ht_offset] @load ht
subs r7,r3,#0 @r3->ht
@ble end_loops_16out @end loop jump
vdup.u8 d22,d0[0] @coeffabs_0 = vdup_lane_u8(coeffabs, 0)@
@@ -848,7 +858,8 @@ end_loops_16out:
ldr r1, [sp], #4
ldr r0, [sp], #4
- ldmfdeq sp!,{r4-r12,r15} @reload the registers from sp
+ beq end2
+
mov r5, #4
add r0, r0, #8
add r1, r1, #16
@@ -934,7 +945,8 @@ end_inner_loop_wd_4_16out:
add r1,r1,r9,lsl #1
add r0,r0,r8
bgt outer_loop_wd_4_16out
-
+end2:
+ vpop {d8 - d15}
ldmfd sp!, {r4-r12, r15} @reload the registers from sp