diff options
Diffstat (limited to 'string/aarch64/strchr.S')
-rw-r--r-- | string/aarch64/strchr.S | 24 |
1 files changed, 9 insertions, 15 deletions
diff --git a/string/aarch64/strchr.S b/string/aarch64/strchr.S index cdb38aa..00d9be3 100644 --- a/string/aarch64/strchr.S +++ b/string/aarch64/strchr.S @@ -11,6 +11,8 @@ * Neon Available. */ +#include "../asmdefs.h" + /* Arguments and results. */ #define srcin x0 #define chrin w1 @@ -48,15 +50,7 @@ /* Locals and temporaries. */ - .macro def_fn f p2align=0 - .text - .p2align \p2align - .global \f - .type \f, %function -\f: - .endm - -def_fn __strchr_aarch64 +ENTRY (__strchr_aarch64) /* Magic constant 0x40100401 to allow us to identify which lane matches the requested byte. Magic constant 0x80200802 used similarly for NUL termination. */ @@ -67,7 +61,7 @@ def_fn __strchr_aarch64 dup vrepmask_c.4s, wtmp2 ands tmp1, srcin, #31 add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ - b.eq .Lloop + b.eq L(loop) /* Input string is not 32-byte aligned. Rather than forcing the padding bytes to a safe value, we calculate the syndrome @@ -93,9 +87,9 @@ def_fn __strchr_aarch64 mov tmp3, vend1.d[0] bic tmp1, tmp3, tmp1 // Mask padding bits. - cbnz tmp1, .Ltail + cbnz tmp1, L(tail) -.Lloop: +L(loop): ld1 {vdata1.16b, vdata2.16b}, [src], #32 cmeq vhas_nul1.16b, vdata1.16b, #0 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b @@ -107,7 +101,7 @@ def_fn __strchr_aarch64 orr vend1.16b, vend1.16b, vend2.16b addp vend1.2d, vend1.2d, vend1.2d mov tmp1, vend1.d[0] - cbz tmp1, .Lloop + cbz tmp1, L(loop) /* Termination condition found. Now need to establish exactly why we terminated. */ @@ -121,7 +115,7 @@ def_fn __strchr_aarch64 addp vend1.16b, vend1.16b, vend2.16b // 128->64 mov tmp1, vend1.d[0] -.Ltail: +L(tail): /* Count the trailing zeros, by bit reversing... */ rbit tmp1, tmp1 /* Re-bias source. */ @@ -134,4 +128,4 @@ def_fn __strchr_aarch64 csel result, result, xzr, eq ret - .size __strchr_aarch64, . - __strchr_aarch64 +END (__strchr_aarch64) |