diff options
Diffstat (limited to 'string/aarch64/strlen.S')
-rw-r--r-- | string/aarch64/strlen.S | 18 |
1 files changed, 5 insertions, 13 deletions
diff --git a/string/aarch64/strlen.S b/string/aarch64/strlen.S index 26388d7..2293f73 100644 --- a/string/aarch64/strlen.S +++ b/string/aarch64/strlen.S @@ -10,6 +10,8 @@ * ARMv8-a, AArch64, unaligned accesses, min page size 4k. */ +#include "../asmdefs.h" + /* To test the page crossing code path more thoroughly, compile with -DTEST_PAGE_CROSS - this will force all calls through the slower entry path. This option is not intended for production use. */ @@ -30,16 +32,6 @@ #define tmp4 x7 #define zeroones x8 -#define L(l) .L ## l - - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and can be done in parallel across the entire word. A faster check @@ -81,7 +73,7 @@ whether the first fetch, which may be misaligned, crosses a page boundary. */ -def_fn __strlen_aarch64 p2align=6 +ENTRY (__strlen_aarch64) and tmp1, srcin, MIN_PAGE_SIZE - 1 mov zeroones, REP8_01 cmp tmp1, MIN_PAGE_SIZE - 16 @@ -122,7 +114,7 @@ L(main_loop_entry): sub src, src, 16 L(main_loop): ldp data1, data2, [src, 32]! -.Lpage_cross_entry: +L(page_cross_entry): sub tmp1, data1, zeroones sub tmp3, data2, zeroones orr tmp2, tmp1, tmp3 @@ -211,4 +203,4 @@ L(page_cross): csel data2, data2, tmp2, eq b L(page_cross_entry) - .size __strlen_aarch64, . - __strlen_aarch64 +END (__strlen_aarch64) |