diff options
author | Wilco Dijkstra <wdijkstr@arm.com> | 2020-01-02 09:14:42 +0000 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2020-01-02 09:16:24 +0000 |
commit | 31b560bc3b82ae45044e6455493ce6783aa94d98 (patch) | |
tree | fa12e74261b0d256c5eda0f0404dbb7f53356b80 | |
parent | 3377796fe24ff1d5396609205426402678208eb1 (diff) | |
download | arm-optimized-routines-31b560bc3b82ae45044e6455493ce6783aa94d98.tar.gz |
string: Use asmdefs.h, ENTRY and END
Cleanup string functions to use asmdefs.h, ENTRY and END instead of
defining macros in each file.
-rw-r--r-- | string/aarch64/memchr.S | 14 | ||||
-rw-r--r-- | string/aarch64/memcmp.S | 14 | ||||
-rw-r--r-- | string/aarch64/memset.S | 15 | ||||
-rw-r--r-- | string/aarch64/strchr.S | 14 | ||||
-rw-r--r-- | string/aarch64/strchrnul.S | 14 | ||||
-rw-r--r-- | string/aarch64/strcmp.S | 15 | ||||
-rw-r--r-- | string/aarch64/strcpy.S | 14 | ||||
-rw-r--r-- | string/aarch64/strlen.S | 16 | ||||
-rw-r--r-- | string/aarch64/strncmp.S | 13 | ||||
-rw-r--r-- | string/aarch64/strnlen.S | 15 | ||||
-rw-r--r-- | string/arm/memcpy.S | 14 | ||||
-rw-r--r-- | string/arm/strcmp-armv6m.S | 11 | ||||
-rw-r--r-- | string/arm/strcmp.S | 17 | ||||
-rw-r--r-- | string/arm/strlen-armv6t2.S | 13 |
14 files changed, 55 insertions, 144 deletions
diff --git a/string/aarch64/memchr.S b/string/aarch64/memchr.S index aff6e3d..6ffade1 100644 --- a/string/aarch64/memchr.S +++ b/string/aarch64/memchr.S @@ -11,6 +11,8 @@ * Neon Available. */ +#include "../asmdefs.h" + /* Arguments and results. */ #define srcin x0 #define chrin w1 @@ -44,15 +46,7 @@ * identify exactly which byte has matched. */ - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn __memchr_aarch64 +ENTRY (__memchr_aarch64) /* Do not dereference srcin if no bytes to compare. */ cbz cntin, .Lzero_length /* @@ -146,4 +140,4 @@ def_fn __memchr_aarch64 mov result, #0 ret - .size __memchr_aarch64, . - __memchr_aarch64 +END (__memchr_aarch64) diff --git a/string/aarch64/memcmp.S b/string/aarch64/memcmp.S index 72a66bc..6722516 100644 --- a/string/aarch64/memcmp.S +++ b/string/aarch64/memcmp.S @@ -9,7 +9,7 @@ * ARMv8-a, AArch64, unaligned accesses. */ -#define L(l) .L ## l +#include "../asmdefs.h" /* Parameters and result. */ #define src1 x0 @@ -27,15 +27,7 @@ #define tmp1 x7 #define tmp2 x8 - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn __memcmp_aarch64 p2align=6 +ENTRY (__memcmp_aarch64) subs limit, limit, 8 b.lo L(less8) @@ -138,4 +130,4 @@ L(byte_loop): sub result, data1w, data2w ret - .size __memcmp_aarch64, . - __memcmp_aarch64 +END (__memcmp_aarch64) diff --git a/string/aarch64/memset.S b/string/aarch64/memset.S index aef22e9..3868141 100644 --- a/string/aarch64/memset.S +++ b/string/aarch64/memset.S @@ -11,6 +11,7 @@ * */ +#include "../asmdefs.h" #define dstin x0 #define val x1 @@ -25,17 +26,7 @@ #define zva_len x7 #define zva_lenw w7 -#define L(l) .L ## l - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn __memset_aarch64 p2align=6 +ENTRY (__memset_aarch64) dup v0.16B, valw add dstend, dstin, count @@ -185,4 +176,4 @@ L(zva_other): 4: add count, count, zva_len b L(tail64) - .size __memset_aarch64, . - __memset_aarch64 +END (__memset_aarch64) diff --git a/string/aarch64/strchr.S b/string/aarch64/strchr.S index cdb38aa..66a1fdd 100644 --- a/string/aarch64/strchr.S +++ b/string/aarch64/strchr.S @@ -11,6 +11,8 @@ * Neon Available. */ +#include "../asmdefs.h" + /* Arguments and results. */ #define srcin x0 #define chrin w1 @@ -48,15 +50,7 @@ /* Locals and temporaries. */ - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn __strchr_aarch64 +ENTRY (__strchr_aarch64) /* Magic constant 0x40100401 to allow us to identify which lane matches the requested byte. Magic constant 0x80200802 used similarly for NUL termination. */ @@ -134,4 +128,4 @@ def_fn __strchr_aarch64 csel result, result, xzr, eq ret - .size __strchr_aarch64, . - __strchr_aarch64 +END (__strchr_aarch64) diff --git a/string/aarch64/strchrnul.S b/string/aarch64/strchrnul.S index 4aee293..697dbf4 100644 --- a/string/aarch64/strchrnul.S +++ b/string/aarch64/strchrnul.S @@ -11,6 +11,8 @@ * Neon Available. */ +#include "../asmdefs.h" + /* Arguments and results. */ #define srcin x0 #define chrin w1 @@ -44,15 +46,7 @@ /* Locals and temporaries. */ - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn __strchrnul_aarch64 +ENTRY (__strchrnul_aarch64) /* Magic constant 0x40100401 to allow us to identify which lane matches the termination condition. */ mov wtmp2, #0x0401 @@ -119,4 +113,4 @@ def_fn __strchrnul_aarch64 add result, src, tmp1, lsr #1 ret - .size __strchrnul_aarch64, . - __strchrnul_aarch64 +END (__strchrnul_aarch64) diff --git a/string/aarch64/strcmp.S b/string/aarch64/strcmp.S index 2aa367c..65af5ce 100644 --- a/string/aarch64/strcmp.S +++ b/string/aarch64/strcmp.S @@ -10,15 +10,7 @@ * ARMv8-a, AArch64 */ - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -#define L(label) .L ## label +#include "../asmdefs.h" #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f @@ -44,7 +36,7 @@ #define pos x11 /* Start of performance-critical section -- one 64B cache line. */ -def_fn __strcmp_aarch64 p2align=6 +ENTRY (__strcmp_aarch64) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 @@ -174,4 +166,5 @@ L(loop_misaligned): L(done): sub result, data1, data2 ret - .size __strcmp_aarch64, .-__strcmp_aarch64 + +END (__strcmp_aarch64) diff --git a/string/aarch64/strcpy.S b/string/aarch64/strcpy.S index 4e10b4d..766e71b 100644 --- a/string/aarch64/strcpy.S +++ b/string/aarch64/strcpy.S @@ -10,6 +10,8 @@ * ARMv8-a, AArch64, unaligned accesses, min page size 4k. */ +#include "../asmdefs.h" + /* To build as stpcpy, define BUILD_STPCPY before compiling this file. To test the page crossing code path more thoroughly, compile with @@ -46,14 +48,6 @@ #define STRCPY __strcpy_aarch64 #endif - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and can be done in parallel across the entire word. */ @@ -85,7 +79,7 @@ #define MIN_PAGE_SIZE (1 << MIN_PAGE_P2) -def_fn STRCPY p2align=6 +ENTRY (STRCPY) /* For moderately short strings, the fastest way to do the copy is to calculate the length of the string in the same way as strlen, then essentially do a memcpy of the result. This avoids the need for @@ -311,4 +305,4 @@ def_fn STRCPY p2align=6 bic has_nul2, tmp3, tmp4 b .Lfp_gt8 - .size STRCPY, . - STRCPY +END (STRCPY) diff --git a/string/aarch64/strlen.S b/string/aarch64/strlen.S index 26388d7..8a7d753 100644 --- a/string/aarch64/strlen.S +++ b/string/aarch64/strlen.S @@ -10,6 +10,8 @@ * ARMv8-a, AArch64, unaligned accesses, min page size 4k. */ +#include "../asmdefs.h" + /* To test the page crossing code path more thoroughly, compile with -DTEST_PAGE_CROSS - this will force all calls through the slower entry path. This option is not intended for production use. */ @@ -30,16 +32,6 @@ #define tmp4 x7 #define zeroones x8 -#define L(l) .L ## l - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and can be done in parallel across the entire word. A faster check @@ -81,7 +73,7 @@ whether the first fetch, which may be misaligned, crosses a page boundary. */ -def_fn __strlen_aarch64 p2align=6 +ENTRY (__strlen_aarch64) and tmp1, srcin, MIN_PAGE_SIZE - 1 mov zeroones, REP8_01 cmp tmp1, MIN_PAGE_SIZE - 16 @@ -211,4 +203,4 @@ L(page_cross): csel data2, data2, tmp2, eq b L(page_cross_entry) - .size __strlen_aarch64, . - __strlen_aarch64 +END (__strlen_aarch64) diff --git a/string/aarch64/strncmp.S b/string/aarch64/strncmp.S index ced72b9..e29fb7d 100644 --- a/string/aarch64/strncmp.S +++ b/string/aarch64/strncmp.S @@ -10,13 +10,7 @@ * ARMv8-a, AArch64 */ - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm +#include "../asmdefs.h" #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f @@ -51,7 +45,7 @@ .rep 7 nop /* Pad so that the loop below fits a cache line. */ .endr -def_fn __strncmp_aarch64 +ENTRY_ALIGN (__strncmp_aarch64, 0) cbz limit, .Lret0 eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -263,4 +257,5 @@ def_fn __strncmp_aarch64 .Lret0: mov result, #0 ret - .size __strncmp_aarch64, . - __strncmp_aarch64 + +END ( __strncmp_aarch64) diff --git a/string/aarch64/strnlen.S b/string/aarch64/strnlen.S index b02c846..bf72686 100644 --- a/string/aarch64/strnlen.S +++ b/string/aarch64/strnlen.S @@ -10,6 +10,8 @@ * ARMv8-a, AArch64 */ +#include "../asmdefs.h" + /* Arguments and results. */ #define srcin x0 #define len x0 @@ -30,14 +32,6 @@ #define pos x13 #define limit_wd x14 - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 @@ -54,7 +48,7 @@ mov len, limit ret -def_fn __strnlen_aarch64 +ENTRY_ALIGN (__strnlen_aarch64, 0) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -157,4 +151,5 @@ def_fn __strnlen_aarch64 csinv data1, data1, xzr, le csel data2, data2, data2a, le b .Lrealigned - .size __strnlen_aarch64, . - .Lstart /* Include pre-padding in size. */ + +END (__strnlen_aarch64) diff --git a/string/arm/memcpy.S b/string/arm/memcpy.S index 3346e4f..e8f5843 100644 --- a/string/arm/memcpy.S +++ b/string/arm/memcpy.S @@ -17,6 +17,8 @@ */ +#include "../asmdefs.h" + .syntax unified /* This implementation requires ARM state. */ .arm @@ -118,15 +120,7 @@ .endm #endif - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn __memcpy_arm p2align=6 +ENTRY (__memcpy_arm) mov dst, dstin /* Preserve dstin, we need to return it. */ cmp count, #64 @@ -590,4 +584,4 @@ def_fn __memcpy_arm p2align=6 bne .Ltail63unaligned bx lr - .size __memcpy_arm, . - __memcpy_arm +END (__memcpy_arm) diff --git a/string/arm/strcmp-armv6m.S b/string/arm/strcmp-armv6m.S index 5ea06c9..d615231 100644 --- a/string/arm/strcmp-armv6m.S +++ b/string/arm/strcmp-armv6m.S @@ -26,12 +26,7 @@ DoSub \n, \label .endm - .text - .p2align 0 - .global __strcmp_armv6m - .type __strcmp_armv6m, %function -__strcmp_armv6m: - .cfi_startproc +ENTRY_ALIGN (__strcmp_armv6m, 4) mov r2, r0 push {r4, r5, r6, lr} orrs r2, r1 @@ -114,5 +109,5 @@ __strcmp_armv6m: 7: subs r0, r2, r3 pop {r4, r5, r6, pc} - .cfi_endproc - .size __strcmp_armv6m, . - __strcmp_armv6m + +END (__strcmp_armv6m) diff --git a/string/arm/strcmp.S b/string/arm/strcmp.S index fb9cae3..b46bc6d 100644 --- a/string/arm/strcmp.S +++ b/string/arm/strcmp.S @@ -10,6 +10,8 @@ is sufficiently aligned. Use saturating arithmetic to optimize the compares. */ +#include "../asmdefs.h" + /* Build Options: STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first byte in the string. If comparing completely random strings @@ -48,14 +50,6 @@ #define LSB 0x000000ff #endif /* not __ARM_BIG_ENDIAN */ - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - /* Parameters and result. */ #define src1 r0 #define src2 r1 @@ -138,7 +132,7 @@ bx lr nop #endif -def_fn __strcmp_arm +ENTRY_ALIGN (__strcmp_arm, 0) #if STRCMP_NO_PRECHECK == 0 ldrb r2, [src1] ldrb r3, [src2] @@ -147,7 +141,6 @@ def_fn __strcmp_arm cmpcs r2, r3 bne .Lfastpath_exit #endif - .cfi_startproc strd r4, r5, [sp, #-16]! .cfi_def_cfa_offset 16 .cfi_offset 4, -16 @@ -475,5 +468,5 @@ def_fn __strcmp_arm .cfi_restore 7 sub result, result, data2, lsr #24 bx lr - .cfi_endproc - .size __strcmp, . - .Lstrcmp_start_addr + +END (__strcmp_arm) diff --git a/string/arm/strlen-armv6t2.S b/string/arm/strlen-armv6t2.S index 279ec87..7245440 100644 --- a/string/arm/strlen-armv6t2.S +++ b/string/arm/strlen-armv6t2.S @@ -11,13 +11,7 @@ */ - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm +#include "../asmdefs.h" #ifdef __ARMEB__ #define S2LO lsl @@ -44,7 +38,7 @@ #define tmp1 r4 /* Overlaps const_0 */ #define tmp2 r5 -def_fn __strlen_armv6t2 p2align=6 +ENTRY (__strlen_armv6t2) pld [srcin, #0] strd r4, r5, [sp, #-8]! bic src, srcin, #7 @@ -122,4 +116,5 @@ def_fn __strlen_armv6t2 p2align=6 movne data1a, const_m1 mov const_0, #0 b .Lstart_realigned - .size __strlen_armv6t2, . - __strlen_armv6t2 + +END (__strlen_armv6t2) |