diff options
author | Adhemerval Zanella <adhemerval.zanella@linaro.org> | 2019-08-26 16:47:57 -0300 |
---|---|---|
committer | Szabolcs Nagy <30925343+nsz-arm@users.noreply.github.com> | 2019-08-28 12:05:23 +0100 |
commit | 9ecd13a59a6a32800658b334a076ca33fdae5be8 (patch) | |
tree | 15e05461124439b829464d532044b127c1d280f4 /string/aarch64/strlen-sve.S | |
parent | 59b123bccae9e5373e01db8273a779bc87733efe (diff) | |
download | arm-optimized-routines-9ecd13a59a6a32800658b334a076ca33fdae5be8.tar.gz |
Import aarch64 sve strlen
The only difference is changing the symbol name from strlen
to __strlen_aarch64_sve.
Diffstat (limited to 'string/aarch64/strlen-sve.S')
-rw-r--r-- | string/aarch64/strlen-sve.S | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/string/aarch64/strlen-sve.S b/string/aarch64/strlen-sve.S new file mode 100644 index 0000000..64ede85 --- /dev/null +++ b/string/aarch64/strlen-sve.S @@ -0,0 +1,55 @@ +/* + * __strlen_aarch64_sve - compute the length of a string + * + * Copyright (c) 2018, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + + .arch armv8-a+sve + .text + + .globl __strlen_aarch64_sve + .type __strlen_aarch64_sve, %function + .p2align 4 +__strlen_aarch64_sve: + setffr /* initialize FFR */ + ptrue p2.b /* all ones; loop invariant */ + mov x1, 0 /* initialize length */ + nop + + /* Read a vector's worth of bytes, stopping on first fault. */ +0: ldff1b z0.b, p2/z, [x0, x1] + nop + rdffrs p0.b, p2/z + b.nlast 2f + + /* First fault did not fail: the whole vector is valid. + Avoid depending on the contents of FFR beyond the branch. */ + incb x1, all /* speculate increment */ + cmpeq p1.b, p2/z, z0.b, 0 /* loop if no zeros */ + b.none 0b + decb x1, all /* undo speculate */ + + /* Zero found. Select the bytes before the first and count them. */ +1: brkb p0.b, p2/z, p1.b + incp x1, p0.b + mov x0, x1 + ret + + /* First fault failed: only some of the vector is valid. + Perform the comparison only on the valid bytes. */ +2: cmpeq p1.b, p0/z, z0.b, 0 + b.any 1b + + /* No zero found. Re-init FFR, increment, and loop. */ + setffr + incp x1, p0.b + b 0b + + .size __strlen_aarch64_sve, . - __strlen_aarch64_sve |