Add PUSH-POP of D registers in Arm Neon 32 bit functions am: a47cb8865a am: 9525ebc765 am: 0671e4cda7 am: ff7a95abd4 am: 6acf9167da am: 85ae219fca

am: 68f18ba505 Change-Id: I3f172309ba2c249d587987bd94d5b5b0937affd3
author: Rakesh Kumar <rakesh.kumar@ittiam.com> 2017-11-07 22:51:30 +0000
committer: android-build-merger <android-build-merger@google.com> 2017-11-07 22:51:30 +0000
commit: 1d0fe6aaf99c3ef1e2d9c4c15ad49ad9180da5b5 (patch)
tree: 1654eb72f94c15fcda13fc52bf7cc8e1b05db214 /common/arm/ihevc_inter_pred_filters_luma_vert.s
parent: 4c7f3d573692c09ccbb56bb2fd51527686d109f5 (diff)
parent: 68f18ba505a4c7fb39ec1ca1f6888e95acc1ff51 (diff)
download: libhevc-1d0fe6aaf99c3ef1e2d9c4c15ad49ad9180da5b5.tar.gz
1 files changed, 21 insertions, 9 deletions
diff --git a/common/arm/ihevc_inter_pred_filters_luma_vert.s b/common/arm/ihevc_inter_pred_filters_luma_vert.s
index f51d68c..3d9ab1c 100644
--- a/common/arm/ihevc_inter_pred_filters_luma_vert.s
+++ b/common/arm/ihevc_inter_pred_filters_luma_vert.s
@@ -103,6 +103,11 @@
 @   r12 => *pi1_coeff
 @   r5 =>  ht
 @   r3 =>  wd
+
+.equ    coeff_offset,   104
+.equ    ht_offset,      108
+.equ    wd_offset,      112
+
 .text
 .align 4
 .syntax unified
@@ -116,15 +121,16 @@
 ihevc_inter_pred_luma_vert_a9q:
 
     stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
+    vpush        {d8 - d15}
 
-    ldr         r12,[sp,#40]                @load pi1_coeff
+    ldr         r12,[sp,#coeff_offset]                @load pi1_coeff
     mov         r6,r3
-    ldr         r5,[sp,#48]                 @load wd
+    ldr         r5,[sp,#wd_offset]                 @load wd
     vld1.u8     {d0},[r12]                  @coeff = vld1_s8(pi1_coeff)
     sub         r12,r2,r2,lsl #2            @src_ctrd & pi1_coeff
     vabs.s8     d0,d0                       @vabs_s8(coeff)
     add         r0,r0,r12                   @r0->pu1_src    r12->pi1_coeff
-    ldr         r3,[sp,#44]                 @load ht
+    ldr         r3,[sp,#ht_offset]                 @load ht
     subs        r7,r3,#0                    @r3->ht
     @ble        end_loops           @end loop jump
     vdup.u8     d22,d0[0]                   @coeffabs_0 = vdup_lane_u8(coeffabs, 0)@
@@ -407,7 +413,8 @@ end_loops:
     ldr         r1, [sp], #4
     ldr         r0, [sp], #4
 
-    ldmfdeq     sp!,{r4-r12,r15}            @reload the registers from sp
+    beq         end1
+
     mov         r5, #4
     add         r0, r0, #8
     add         r1, r1, #8
@@ -491,6 +498,8 @@ end_inner_loop_wd_4:
     add         r0,r0,r8
     bgt         outer_loop_wd_4
 
+end1:
+    vpop         {d8 - d15}
     ldmfd       sp!, {r4-r12, r15}          @reload the registers from sp
 
 
@@ -564,15 +573,16 @@ end_inner_loop_wd_4:
 ihevc_inter_pred_luma_vert_w16out_a9q:
 
     stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
+    vpush        {d8 - d15}
 
-    ldr         r12,[sp,#40]                @load pi1_coeff
+    ldr         r12,[sp,#coeff_offset]                @load pi1_coeff
     mov         r6,r3
-    ldr         r5,[sp,#48]                 @load wd
+    ldr         r5,[sp,#wd_offset]                 @load wd
     vld1.u8     {d0},[r12]                  @coeff = vld1_s8(pi1_coeff)
     sub         r12,r2,r2,lsl #2            @src_ctrd & pi1_coeff
     vabs.s8     d0,d0                       @vabs_s8(coeff)
     add         r0,r0,r12                   @r0->pu1_src    r12->pi1_coeff
-    ldr         r3,[sp,#44]                 @load ht
+    ldr         r3,[sp,#ht_offset]                 @load ht
     subs        r7,r3,#0                    @r3->ht
     @ble        end_loops_16out         @end loop jump
     vdup.u8     d22,d0[0]                   @coeffabs_0 = vdup_lane_u8(coeffabs, 0)@
@@ -848,7 +858,8 @@ end_loops_16out:
     ldr         r1, [sp], #4
     ldr         r0, [sp], #4
 
-    ldmfdeq     sp!,{r4-r12,r15}            @reload the registers from sp
+    beq         end2
+
     mov         r5, #4
     add         r0, r0, #8
     add         r1, r1, #16
@@ -934,7 +945,8 @@ end_inner_loop_wd_4_16out:
     add         r1,r1,r9,lsl #1
     add         r0,r0,r8
     bgt         outer_loop_wd_4_16out
-
+end2:
+    vpop         {d8 - d15}
     ldmfd       sp!, {r4-r12, r15}          @reload the registers from sp
author	Rakesh Kumar <rakesh.kumar@ittiam.com>	2017-11-07 22:51:30 +0000
committer	android-build-merger <android-build-merger@google.com>	2017-11-07 22:51:30 +0000
commit	1d0fe6aaf99c3ef1e2d9c4c15ad49ad9180da5b5 (patch)
tree	1654eb72f94c15fcda13fc52bf7cc8e1b05db214 /common/arm/ihevc_inter_pred_filters_luma_vert.s
parent	4c7f3d573692c09ccbb56bb2fd51527686d109f5 (diff)
parent	68f18ba505a4c7fb39ec1ca1f6888e95acc1ff51 (diff)
download	libhevc-1d0fe6aaf99c3ef1e2d9c4c15ad49ad9180da5b5.tar.gz