aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2020-01-02 09:14:42 +0000
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2020-01-02 09:16:24 +0000
commit31b560bc3b82ae45044e6455493ce6783aa94d98 (patch)
treefa12e74261b0d256c5eda0f0404dbb7f53356b80
parent3377796fe24ff1d5396609205426402678208eb1 (diff)
downloadarm-optimized-routines-31b560bc3b82ae45044e6455493ce6783aa94d98.tar.gz
string: Use asmdefs.h, ENTRY and END
Cleanup string functions to use asmdefs.h, ENTRY and END instead of defining macros in each file.
-rw-r--r--string/aarch64/memchr.S14
-rw-r--r--string/aarch64/memcmp.S14
-rw-r--r--string/aarch64/memset.S15
-rw-r--r--string/aarch64/strchr.S14
-rw-r--r--string/aarch64/strchrnul.S14
-rw-r--r--string/aarch64/strcmp.S15
-rw-r--r--string/aarch64/strcpy.S14
-rw-r--r--string/aarch64/strlen.S16
-rw-r--r--string/aarch64/strncmp.S13
-rw-r--r--string/aarch64/strnlen.S15
-rw-r--r--string/arm/memcpy.S14
-rw-r--r--string/arm/strcmp-armv6m.S11
-rw-r--r--string/arm/strcmp.S17
-rw-r--r--string/arm/strlen-armv6t2.S13
14 files changed, 55 insertions, 144 deletions
diff --git a/string/aarch64/memchr.S b/string/aarch64/memchr.S
index aff6e3d..6ffade1 100644
--- a/string/aarch64/memchr.S
+++ b/string/aarch64/memchr.S
@@ -11,6 +11,8 @@
* Neon Available.
*/
+#include "../asmdefs.h"
+
/* Arguments and results. */
#define srcin x0
#define chrin w1
@@ -44,15 +46,7 @@
* identify exactly which byte has matched.
*/
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn __memchr_aarch64
+ENTRY (__memchr_aarch64)
/* Do not dereference srcin if no bytes to compare. */
cbz cntin, .Lzero_length
/*
@@ -146,4 +140,4 @@ def_fn __memchr_aarch64
mov result, #0
ret
- .size __memchr_aarch64, . - __memchr_aarch64
+END (__memchr_aarch64)
diff --git a/string/aarch64/memcmp.S b/string/aarch64/memcmp.S
index 72a66bc..6722516 100644
--- a/string/aarch64/memcmp.S
+++ b/string/aarch64/memcmp.S
@@ -9,7 +9,7 @@
* ARMv8-a, AArch64, unaligned accesses.
*/
-#define L(l) .L ## l
+#include "../asmdefs.h"
/* Parameters and result. */
#define src1 x0
@@ -27,15 +27,7 @@
#define tmp1 x7
#define tmp2 x8
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn __memcmp_aarch64 p2align=6
+ENTRY (__memcmp_aarch64)
subs limit, limit, 8
b.lo L(less8)
@@ -138,4 +130,4 @@ L(byte_loop):
sub result, data1w, data2w
ret
- .size __memcmp_aarch64, . - __memcmp_aarch64
+END (__memcmp_aarch64)
diff --git a/string/aarch64/memset.S b/string/aarch64/memset.S
index aef22e9..3868141 100644
--- a/string/aarch64/memset.S
+++ b/string/aarch64/memset.S
@@ -11,6 +11,7 @@
*
*/
+#include "../asmdefs.h"
#define dstin x0
#define val x1
@@ -25,17 +26,7 @@
#define zva_len x7
#define zva_lenw w7
-#define L(l) .L ## l
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn __memset_aarch64 p2align=6
+ENTRY (__memset_aarch64)
dup v0.16B, valw
add dstend, dstin, count
@@ -185,4 +176,4 @@ L(zva_other):
4: add count, count, zva_len
b L(tail64)
- .size __memset_aarch64, . - __memset_aarch64
+END (__memset_aarch64)
diff --git a/string/aarch64/strchr.S b/string/aarch64/strchr.S
index cdb38aa..66a1fdd 100644
--- a/string/aarch64/strchr.S
+++ b/string/aarch64/strchr.S
@@ -11,6 +11,8 @@
* Neon Available.
*/
+#include "../asmdefs.h"
+
/* Arguments and results. */
#define srcin x0
#define chrin w1
@@ -48,15 +50,7 @@
/* Locals and temporaries. */
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn __strchr_aarch64
+ENTRY (__strchr_aarch64)
/* Magic constant 0x40100401 to allow us to identify which lane
matches the requested byte. Magic constant 0x80200802 used
similarly for NUL termination. */
@@ -134,4 +128,4 @@ def_fn __strchr_aarch64
csel result, result, xzr, eq
ret
- .size __strchr_aarch64, . - __strchr_aarch64
+END (__strchr_aarch64)
diff --git a/string/aarch64/strchrnul.S b/string/aarch64/strchrnul.S
index 4aee293..697dbf4 100644
--- a/string/aarch64/strchrnul.S
+++ b/string/aarch64/strchrnul.S
@@ -11,6 +11,8 @@
* Neon Available.
*/
+#include "../asmdefs.h"
+
/* Arguments and results. */
#define srcin x0
#define chrin w1
@@ -44,15 +46,7 @@
/* Locals and temporaries. */
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn __strchrnul_aarch64
+ENTRY (__strchrnul_aarch64)
/* Magic constant 0x40100401 to allow us to identify which lane
matches the termination condition. */
mov wtmp2, #0x0401
@@ -119,4 +113,4 @@ def_fn __strchrnul_aarch64
add result, src, tmp1, lsr #1
ret
- .size __strchrnul_aarch64, . - __strchrnul_aarch64
+END (__strchrnul_aarch64)
diff --git a/string/aarch64/strcmp.S b/string/aarch64/strcmp.S
index 2aa367c..65af5ce 100644
--- a/string/aarch64/strcmp.S
+++ b/string/aarch64/strcmp.S
@@ -10,15 +10,7 @@
* ARMv8-a, AArch64
*/
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-#define L(label) .L ## label
+#include "../asmdefs.h"
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
@@ -44,7 +36,7 @@
#define pos x11
/* Start of performance-critical section -- one 64B cache line. */
-def_fn __strcmp_aarch64 p2align=6
+ENTRY (__strcmp_aarch64)
eor tmp1, src1, src2
mov zeroones, #REP8_01
tst tmp1, #7
@@ -174,4 +166,5 @@ L(loop_misaligned):
L(done):
sub result, data1, data2
ret
- .size __strcmp_aarch64, .-__strcmp_aarch64
+
+END (__strcmp_aarch64)
diff --git a/string/aarch64/strcpy.S b/string/aarch64/strcpy.S
index 4e10b4d..766e71b 100644
--- a/string/aarch64/strcpy.S
+++ b/string/aarch64/strcpy.S
@@ -10,6 +10,8 @@
* ARMv8-a, AArch64, unaligned accesses, min page size 4k.
*/
+#include "../asmdefs.h"
+
/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
To test the page crossing code path more thoroughly, compile with
@@ -46,14 +48,6 @@
#define STRCPY __strcpy_aarch64
#endif
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
can be done in parallel across the entire word. */
@@ -85,7 +79,7 @@
#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
-def_fn STRCPY p2align=6
+ENTRY (STRCPY)
/* For moderately short strings, the fastest way to do the copy is to
calculate the length of the string in the same way as strlen, then
essentially do a memcpy of the result. This avoids the need for
@@ -311,4 +305,4 @@ def_fn STRCPY p2align=6
bic has_nul2, tmp3, tmp4
b .Lfp_gt8
- .size STRCPY, . - STRCPY
+END (STRCPY)
diff --git a/string/aarch64/strlen.S b/string/aarch64/strlen.S
index 26388d7..8a7d753 100644
--- a/string/aarch64/strlen.S
+++ b/string/aarch64/strlen.S
@@ -10,6 +10,8 @@
* ARMv8-a, AArch64, unaligned accesses, min page size 4k.
*/
+#include "../asmdefs.h"
+
/* To test the page crossing code path more thoroughly, compile with
-DTEST_PAGE_CROSS - this will force all calls through the slower
entry path. This option is not intended for production use. */
@@ -30,16 +32,6 @@
#define tmp4 x7
#define zeroones x8
-#define L(l) .L ## l
-
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
can be done in parallel across the entire word. A faster check
@@ -81,7 +73,7 @@
whether the first fetch, which may be misaligned, crosses a page
boundary. */
-def_fn __strlen_aarch64 p2align=6
+ENTRY (__strlen_aarch64)
and tmp1, srcin, MIN_PAGE_SIZE - 1
mov zeroones, REP8_01
cmp tmp1, MIN_PAGE_SIZE - 16
@@ -211,4 +203,4 @@ L(page_cross):
csel data2, data2, tmp2, eq
b L(page_cross_entry)
- .size __strlen_aarch64, . - __strlen_aarch64
+END (__strlen_aarch64)
diff --git a/string/aarch64/strncmp.S b/string/aarch64/strncmp.S
index ced72b9..e29fb7d 100644
--- a/string/aarch64/strncmp.S
+++ b/string/aarch64/strncmp.S
@@ -10,13 +10,7 @@
* ARMv8-a, AArch64
*/
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
+#include "../asmdefs.h"
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
@@ -51,7 +45,7 @@
.rep 7
nop /* Pad so that the loop below fits a cache line. */
.endr
-def_fn __strncmp_aarch64
+ENTRY_ALIGN (__strncmp_aarch64, 0)
cbz limit, .Lret0
eor tmp1, src1, src2
mov zeroones, #REP8_01
@@ -263,4 +257,5 @@ def_fn __strncmp_aarch64
.Lret0:
mov result, #0
ret
- .size __strncmp_aarch64, . - __strncmp_aarch64
+
+END ( __strncmp_aarch64)
diff --git a/string/aarch64/strnlen.S b/string/aarch64/strnlen.S
index b02c846..bf72686 100644
--- a/string/aarch64/strnlen.S
+++ b/string/aarch64/strnlen.S
@@ -10,6 +10,8 @@
* ARMv8-a, AArch64
*/
+#include "../asmdefs.h"
+
/* Arguments and results. */
#define srcin x0
#define len x0
@@ -30,14 +32,6 @@
#define pos x13
#define limit_wd x14
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
#define REP8_80 0x8080808080808080
@@ -54,7 +48,7 @@
mov len, limit
ret
-def_fn __strnlen_aarch64
+ENTRY_ALIGN (__strnlen_aarch64, 0)
cbz limit, .Lhit_limit
mov zeroones, #REP8_01
bic src, srcin, #15
@@ -157,4 +151,5 @@ def_fn __strnlen_aarch64
csinv data1, data1, xzr, le
csel data2, data2, data2a, le
b .Lrealigned
- .size __strnlen_aarch64, . - .Lstart /* Include pre-padding in size. */
+
+END (__strnlen_aarch64)
diff --git a/string/arm/memcpy.S b/string/arm/memcpy.S
index 3346e4f..e8f5843 100644
--- a/string/arm/memcpy.S
+++ b/string/arm/memcpy.S
@@ -17,6 +17,8 @@
*/
+#include "../asmdefs.h"
+
.syntax unified
/* This implementation requires ARM state. */
.arm
@@ -118,15 +120,7 @@
.endm
#endif
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
-def_fn __memcpy_arm p2align=6
+ENTRY (__memcpy_arm)
mov dst, dstin /* Preserve dstin, we need to return it. */
cmp count, #64
@@ -590,4 +584,4 @@ def_fn __memcpy_arm p2align=6
bne .Ltail63unaligned
bx lr
- .size __memcpy_arm, . - __memcpy_arm
+END (__memcpy_arm)
diff --git a/string/arm/strcmp-armv6m.S b/string/arm/strcmp-armv6m.S
index 5ea06c9..d615231 100644
--- a/string/arm/strcmp-armv6m.S
+++ b/string/arm/strcmp-armv6m.S
@@ -26,12 +26,7 @@
DoSub \n, \label
.endm
- .text
- .p2align 0
- .global __strcmp_armv6m
- .type __strcmp_armv6m, %function
-__strcmp_armv6m:
- .cfi_startproc
+ENTRY_ALIGN (__strcmp_armv6m, 4)
mov r2, r0
push {r4, r5, r6, lr}
orrs r2, r1
@@ -114,5 +109,5 @@ __strcmp_armv6m:
7:
subs r0, r2, r3
pop {r4, r5, r6, pc}
- .cfi_endproc
- .size __strcmp_armv6m, . - __strcmp_armv6m
+
+END (__strcmp_armv6m)
diff --git a/string/arm/strcmp.S b/string/arm/strcmp.S
index fb9cae3..b46bc6d 100644
--- a/string/arm/strcmp.S
+++ b/string/arm/strcmp.S
@@ -10,6 +10,8 @@
is sufficiently aligned. Use saturating arithmetic to optimize
the compares. */
+#include "../asmdefs.h"
+
/* Build Options:
STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
byte in the string. If comparing completely random strings
@@ -48,14 +50,6 @@
#define LSB 0x000000ff
#endif /* not __ARM_BIG_ENDIAN */
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
-
/* Parameters and result. */
#define src1 r0
#define src2 r1
@@ -138,7 +132,7 @@
bx lr
nop
#endif
-def_fn __strcmp_arm
+ENTRY_ALIGN (__strcmp_arm, 0)
#if STRCMP_NO_PRECHECK == 0
ldrb r2, [src1]
ldrb r3, [src2]
@@ -147,7 +141,6 @@ def_fn __strcmp_arm
cmpcs r2, r3
bne .Lfastpath_exit
#endif
- .cfi_startproc
strd r4, r5, [sp, #-16]!
.cfi_def_cfa_offset 16
.cfi_offset 4, -16
@@ -475,5 +468,5 @@ def_fn __strcmp_arm
.cfi_restore 7
sub result, result, data2, lsr #24
bx lr
- .cfi_endproc
- .size __strcmp, . - .Lstrcmp_start_addr
+
+END (__strcmp_arm)
diff --git a/string/arm/strlen-armv6t2.S b/string/arm/strlen-armv6t2.S
index 279ec87..7245440 100644
--- a/string/arm/strlen-armv6t2.S
+++ b/string/arm/strlen-armv6t2.S
@@ -11,13 +11,7 @@
*/
- .macro def_fn f p2align=0
- .text
- .p2align \p2align
- .global \f
- .type \f, %function
-\f:
- .endm
+#include "../asmdefs.h"
#ifdef __ARMEB__
#define S2LO lsl
@@ -44,7 +38,7 @@
#define tmp1 r4 /* Overlaps const_0 */
#define tmp2 r5
-def_fn __strlen_armv6t2 p2align=6
+ENTRY (__strlen_armv6t2)
pld [srcin, #0]
strd r4, r5, [sp, #-8]!
bic src, srcin, #7
@@ -122,4 +116,5 @@ def_fn __strlen_armv6t2 p2align=6
movne data1a, const_m1
mov const_0, #0
b .Lstart_realigned
- .size __strlen_armv6t2, . - __strlen_armv6t2
+
+END (__strlen_armv6t2)