aboutsummaryrefslogtreecommitdiff
path: root/config/arm-neon/vpx_dsp/arm/loopfilter_4_neon.asm.S
diff options
context:
space:
mode:
Diffstat (limited to 'config/arm-neon/vpx_dsp/arm/loopfilter_4_neon.asm.S')
-rw-r--r--config/arm-neon/vpx_dsp/arm/loopfilter_4_neon.asm.S56
1 files changed, 25 insertions, 31 deletions
diff --git a/config/arm-neon/vpx_dsp/arm/loopfilter_4_neon.asm.S b/config/arm-neon/vpx_dsp/arm/loopfilter_4_neon.asm.S
index 712a534cf..6a308cb74 100644
--- a/config/arm-neon/vpx_dsp/arm/loopfilter_4_neon.asm.S
+++ b/config/arm-neon/vpx_dsp/arm/loopfilter_4_neon.asm.S
@@ -1,6 +1,6 @@
@ This file was created from a .asm file
@ using the ads2gas.pl script.
- .syntax unified
+.syntax unified
@
@ Copyright (c) 2013 The WebM project authors. All Rights Reserved.
@
@@ -11,18 +11,18 @@
@ be found in the AUTHORS file in the root of the source tree.
@
- .global vpx_lpf_horizontal_4_neon
- .type vpx_lpf_horizontal_4_neon, function
- .global vpx_lpf_vertical_4_neon
- .type vpx_lpf_vertical_4_neon, function
- .global vpx_lpf_horizontal_4_dual_neon
- .type vpx_lpf_horizontal_4_dual_neon, function
- .global vpx_lpf_vertical_4_dual_neon
- .type vpx_lpf_vertical_4_dual_neon, function
- .arm
+ .global vpx_lpf_horizontal_4_neon
+ .type vpx_lpf_horizontal_4_neon, function
+ .global vpx_lpf_vertical_4_neon
+ .type vpx_lpf_vertical_4_neon, function
+ .global vpx_lpf_horizontal_4_dual_neon
+ .type vpx_lpf_horizontal_4_dual_neon, function
+ .global vpx_lpf_vertical_4_dual_neon
+ .type vpx_lpf_vertical_4_dual_neon, function
+ .arm
-.text
-.p2align 2
+ .text
+ .p2align 2
@ Currently vpx only works on iterations 8 at a time. The vp8 loop filter
@ works on 16 iterations at a time.
@@ -38,8 +38,7 @@
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh,
-_vpx_lpf_horizontal_4_neon:
- vpx_lpf_horizontal_4_neon: @ PROC
+vpx_lpf_horizontal_4_neon: @ PROC
push {lr}
vld1.8 {d0[]}, [r2] @ duplicate *blimit
@@ -72,7 +71,7 @@ _vpx_lpf_horizontal_4_neon:
vst1.u8 {d7}, [r3,:64], r1 @ store oq1
pop {pc}
- .size vpx_lpf_horizontal_4_neon, .-vpx_lpf_horizontal_4_neon @ ENDP @ |vpx_lpf_horizontal_4_neon|
+.size vpx_lpf_horizontal_4_neon, .-vpx_lpf_horizontal_4_neon @ ENDP @ |vpx_lpf_horizontal_4_neon|
@ Currently vpx only works on iterations 8 at a time. The vp8 loop filter
@ works on 16 iterations at a time.
@@ -88,8 +87,7 @@ _vpx_lpf_horizontal_4_neon:
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh,
-_vpx_lpf_vertical_4_neon:
- vpx_lpf_vertical_4_neon: @ PROC
+vpx_lpf_vertical_4_neon: @ PROC
push {lr}
vld1.8 {d0[]}, [r2] @ duplicate *blimit
@@ -140,7 +138,7 @@ _vpx_lpf_vertical_4_neon:
vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0]
pop {pc}
- .size vpx_lpf_vertical_4_neon, .-vpx_lpf_vertical_4_neon @ ENDP @ |vpx_lpf_vertical_4_neon|
+.size vpx_lpf_vertical_4_neon, .-vpx_lpf_vertical_4_neon @ ENDP @ |vpx_lpf_vertical_4_neon|
@ void filter4_8();
@ This is a helper function for the loopfilters. The invidual functions do the
@@ -166,8 +164,7 @@ _vpx_lpf_vertical_4_neon:
@ d5 op0
@ d6 oq0
@ d7 oq1
-_filter4_8:
- filter4_8: @ PROC
+filter4_8: @ PROC
@ filter_mask
vabd.u8 d19, d3, d4 @ m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 @ m2 = abs(p2 - p1)
@@ -257,7 +254,7 @@ _filter4_8:
veor d7, d20, d18 @ *oq1 = u^0x80
bx lr
- .size filter4_8, .-filter4_8 @ ENDP @ |filter4_8|
+.size filter4_8, .-filter4_8 @ ENDP @ |filter4_8|
@void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
@ const uint8_t *blimit0,
@@ -275,8 +272,7 @@ _filter4_8:
@ sp+8 const uint8_t *limit1,
@ sp+12 const uint8_t *thresh1,
-_vpx_lpf_horizontal_4_dual_neon:
- vpx_lpf_horizontal_4_dual_neon: @ PROC
+vpx_lpf_horizontal_4_dual_neon: @ PROC
push {lr}
ldr r12, [sp, #4] @ load thresh0
@@ -323,7 +319,7 @@ _vpx_lpf_horizontal_4_dual_neon:
vpop {d8-d15} @ restore neon registers
pop {pc}
- .size vpx_lpf_horizontal_4_dual_neon, .-vpx_lpf_horizontal_4_dual_neon @ ENDP @ |vpx_lpf_horizontal_4_dual_neon|
+.size vpx_lpf_horizontal_4_dual_neon, .-vpx_lpf_horizontal_4_dual_neon @ ENDP @ |vpx_lpf_horizontal_4_dual_neon|
@void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p,
@ const uint8_t *blimit0,
@@ -341,8 +337,7 @@ _vpx_lpf_horizontal_4_dual_neon:
@ sp+8 const uint8_t *limit1,
@ sp+12 const uint8_t *thresh1,
-_vpx_lpf_vertical_4_dual_neon:
- vpx_lpf_vertical_4_dual_neon: @ PROC
+vpx_lpf_vertical_4_dual_neon: @ PROC
push {lr}
ldr r12, [sp, #4] @ load thresh0
@@ -439,7 +434,7 @@ _vpx_lpf_vertical_4_dual_neon:
vpop {d8-d15} @ restore neon registers
pop {pc}
- .size vpx_lpf_vertical_4_dual_neon, .-vpx_lpf_vertical_4_dual_neon @ ENDP @ |vpx_lpf_vertical_4_dual_neon|
+.size vpx_lpf_vertical_4_dual_neon, .-vpx_lpf_vertical_4_dual_neon @ ENDP @ |vpx_lpf_vertical_4_dual_neon|
@ void filter4_16();
@ This is a helper function for the loopfilters. The invidual functions do the
@@ -464,8 +459,7 @@ _vpx_lpf_vertical_4_dual_neon:
@ q6 op0
@ q7 oq0
@ q8 oq1
-_filter4_16:
- filter4_16: @ PROC
+filter4_16: @ PROC
@ filter_mask
vabd.u8 q11, q3, q4 @ m1 = abs(p3 - p2)
@@ -558,6 +552,6 @@ _filter4_16:
veor q8, q12, q10 @ *oq1 = u^0x80
bx lr
- .size filter4_16, .-filter4_16 @ ENDP @ |filter4_16|
+.size filter4_16, .-filter4_16 @ ENDP @ |filter4_16|
- .section .note.GNU-stack,"",%progbits
+ .section .note.GNU-stack,"",%progbits