aboutsummaryrefslogtreecommitdiff
path: root/string/aarch64/strlen-sve.S
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-08-26 16:47:57 -0300
committerSzabolcs Nagy <30925343+nsz-arm@users.noreply.github.com>2019-08-28 12:05:23 +0100
commit9ecd13a59a6a32800658b334a076ca33fdae5be8 (patch)
tree15e05461124439b829464d532044b127c1d280f4 /string/aarch64/strlen-sve.S
parent59b123bccae9e5373e01db8273a779bc87733efe (diff)
downloadarm-optimized-routines-9ecd13a59a6a32800658b334a076ca33fdae5be8.tar.gz
Import aarch64 sve strlen
The only difference is changing the symbol name from strlen to __strlen_aarch64_sve.
Diffstat (limited to 'string/aarch64/strlen-sve.S')
-rw-r--r--string/aarch64/strlen-sve.S55
1 files changed, 55 insertions, 0 deletions
diff --git a/string/aarch64/strlen-sve.S b/string/aarch64/strlen-sve.S
new file mode 100644
index 0000000..64ede85
--- /dev/null
+++ b/string/aarch64/strlen-sve.S
@@ -0,0 +1,55 @@
+/*
+ * __strlen_aarch64_sve - compute the length of a string
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+ .arch armv8-a+sve
+ .text
+
+ .globl __strlen_aarch64_sve
+ .type __strlen_aarch64_sve, %function
+ .p2align 4
+__strlen_aarch64_sve:
+ setffr /* initialize FFR */
+ ptrue p2.b /* all ones; loop invariant */
+ mov x1, 0 /* initialize length */
+ nop
+
+ /* Read a vector's worth of bytes, stopping on first fault. */
+0: ldff1b z0.b, p2/z, [x0, x1]
+ nop
+ rdffrs p0.b, p2/z
+ b.nlast 2f
+
+ /* First fault did not fail: the whole vector is valid.
+ Avoid depending on the contents of FFR beyond the branch. */
+ incb x1, all /* speculate increment */
+ cmpeq p1.b, p2/z, z0.b, 0 /* loop if no zeros */
+ b.none 0b
+ decb x1, all /* undo speculate */
+
+ /* Zero found. Select the bytes before the first and count them. */
+1: brkb p0.b, p2/z, p1.b
+ incp x1, p0.b
+ mov x0, x1
+ ret
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparison only on the valid bytes. */
+2: cmpeq p1.b, p0/z, z0.b, 0
+ b.any 1b
+
+ /* No zero found. Re-init FFR, increment, and loop. */
+ setffr
+ incp x1, p0.b
+ b 0b
+
+ .size __strlen_aarch64_sve, . - __strlen_aarch64_sve