diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 00:57:30 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 00:57:30 +0000 |
commit | 0edd6499aaed16bf45de92bb0ad1c729486ce6f4 (patch) | |
tree | b6182e391304fb3a42c51d482dcf671f540f2363 /string/aarch64/strchr-mte.S | |
parent | f2e7d2de0fe4c2bddb59992ba401391f38627a1e (diff) | |
parent | 172d24a7ae67ee7bae413d5a8618f1b5edc002be (diff) | |
download | arm-optimized-routines-0edd6499aaed16bf45de92bb0ad1c729486ce6f4.tar.gz |
Snap for 10447354 from 172d24a7ae67ee7bae413d5a8618f1b5edc002be to mainline-cellbroadcast-releaseaml_cbr_341710000aml_cbr_341610000aml_cbr_341510010aml_cbr_341410010aml_cbr_341311010aml_cbr_341110000aml_cbr_341011000aml_cbr_340914000android14-mainline-cellbroadcast-release
Change-Id: I8753ae14d61308952964b5f87c7e48044f60727c
Diffstat (limited to 'string/aarch64/strchr-mte.S')
-rw-r--r-- | string/aarch64/strchr-mte.S | 58 |
1 files changed, 27 insertions, 31 deletions
diff --git a/string/aarch64/strchr-mte.S b/string/aarch64/strchr-mte.S index dcb0e46..6ec08f7 100644 --- a/string/aarch64/strchr-mte.S +++ b/string/aarch64/strchr-mte.S @@ -1,8 +1,8 @@ /* * strchr - find a character in a string * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define srcin x0 #define chrin w1 @@ -19,8 +19,7 @@ #define src x2 #define tmp1 x1 -#define wtmp2 w3 -#define tmp3 x3 +#define tmp2 x3 #define vrepchr v0 #define vdata v1 @@ -28,39 +27,30 @@ #define vhas_nul v2 #define vhas_chr v3 #define vrepmask v4 -#define vrepmask2 v5 -#define vend v6 -#define dend d6 +#define vend v5 +#define dend d5 /* Core algorithm. For each 16-byte chunk we calculate a 64-bit syndrome value with four bits - per byte. For even bytes, bits 0-1 are set if the relevant byte matched the - requested character, bits 2-3 are set if the byte is NUL (or matched), and - bits 4-7 are not used and must be zero if none of bits 0-3 are set). Odd - bytes set bits 4-7 so that adjacent bytes can be merged. Since the bits - in the syndrome reflect the order in which things occur in the original - string, counting trailing zeros identifies exactly which byte matched. */ + per byte. Bits 0-1 are set if the relevant byte matched the requested + character, bits 2-3 are set if the byte is NUL or matched. Count trailing + zeroes gives the position of the matching byte if it is a multiple of 4. + If it is not a multiple of 4, there was no match. */ ENTRY (__strchr_aarch64_mte) PTR_ARG (0) bic src, srcin, 15 dup vrepchr.16b, chrin ld1 {vdata.16b}, [src] - mov wtmp2, 0x3003 - dup vrepmask.8h, wtmp2 + movi vrepmask.16b, 0x33 cmeq vhas_nul.16b, vdata.16b, 0 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b - mov wtmp2, 0xf00f - dup vrepmask2.8h, wtmp2 - bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b - and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b - lsl tmp3, srcin, 2 - addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ - + lsl tmp2, srcin, 2 + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov tmp1, dend - lsr tmp1, tmp1, tmp3 + lsr tmp1, tmp1, tmp2 cbz tmp1, L(loop) rbit tmp1, tmp1 @@ -74,28 +64,34 @@ ENTRY (__strchr_aarch64_mte) .p2align 4 L(loop): - ldr qdata, [src, 16]! + ldr qdata, [src, 16] + cmeq vhas_chr.16b, vdata.16b, vrepchr.16b + cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b + umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + fmov tmp1, dend + cbnz tmp1, L(end) + ldr qdata, [src, 32]! cmeq vhas_chr.16b, vdata.16b, vrepchr.16b cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b umaxp vend.16b, vhas_nul.16b, vhas_nul.16b fmov tmp1, dend cbz tmp1, L(loop) + sub src, src, 16 +L(end): #ifdef __AARCH64EB__ bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b - and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov tmp1, dend #else bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b - and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov tmp1, dend rbit tmp1, tmp1 #endif + add src, src, 16 clz tmp1, tmp1 - /* Tmp1 is an even multiple of 2 if the target character was - found first. Otherwise we've found the end of string. */ + /* Tmp1 is a multiple of 4 if the target character was found. */ tst tmp1, 2 add result, src, tmp1, lsr 2 csel result, result, xzr, eq |