aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKinsey Moore <wkmoore@gmail.com>2020-12-11 14:24:26 -0600
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2020-12-17 13:15:41 +0000
commitafd6244a1f8d92299967d3d50e6f6a1466da413f (patch)
treedcdff0861f1ec286a702901de0914d748c4c2f56
parent58af293330a412b593375b84c57e5f8968425db7 (diff)
downloadarm-optimized-routines-afd6244a1f8d92299967d3d50e6f6a1466da413f.tar.gz
string: Add support for ILP32 ABI
This adds sanitization of padding bits for pointers and size_t types as required by ARM aapcs64 for the AArch64 ILP32 ABI.
-rw-r--r--string/aarch64/memchr-mte.S2
-rw-r--r--string/aarch64/memchr-sve.S2
-rw-r--r--string/aarch64/memchr.S2
-rw-r--r--string/aarch64/memcmp-sve.S3
-rw-r--r--string/aarch64/memcmp.S3
-rw-r--r--string/aarch64/memcpy-advsimd.S3
-rw-r--r--string/aarch64/memcpy.S3
-rw-r--r--string/aarch64/memrchr.S1
-rw-r--r--string/aarch64/memset.S2
-rw-r--r--string/aarch64/strchr-mte.S1
-rw-r--r--string/aarch64/strchr-sve.S1
-rw-r--r--string/aarch64/strchr.S1
-rw-r--r--string/aarch64/strchrnul-mte.S1
-rw-r--r--string/aarch64/strchrnul.S1
-rw-r--r--string/aarch64/strcmp-mte.S2
-rw-r--r--string/aarch64/strcmp-sve.S2
-rw-r--r--string/aarch64/strcmp.S2
-rw-r--r--string/aarch64/strcpy-mte.S2
-rw-r--r--string/aarch64/strcpy-sve.S2
-rw-r--r--string/aarch64/strcpy.S2
-rw-r--r--string/aarch64/strlen-mte.S1
-rw-r--r--string/aarch64/strlen-sve.S1
-rw-r--r--string/aarch64/strlen.S1
-rw-r--r--string/aarch64/strncmp-mte.S3
-rw-r--r--string/aarch64/strncmp-sve.S3
-rw-r--r--string/aarch64/strncmp.S3
-rw-r--r--string/aarch64/strnlen-sve.S2
-rw-r--r--string/aarch64/strnlen.S2
-rw-r--r--string/aarch64/strrchr-mte.S1
-rw-r--r--string/aarch64/strrchr-sve.S1
-rw-r--r--string/aarch64/strrchr.S1
-rw-r--r--string/asmdefs.h14
32 files changed, 71 insertions, 0 deletions
diff --git a/string/aarch64/memchr-mte.S b/string/aarch64/memchr-mte.S
index 31ad050..c2e967d 100644
--- a/string/aarch64/memchr-mte.S
+++ b/string/aarch64/memchr-mte.S
@@ -44,6 +44,8 @@
string, counting trailing zeros identifies exactly which byte matched. */
ENTRY (__memchr_aarch64_mte)
+ PTR_ARG (0)
+ SIZE_ARG (2)
bic src, srcin, 15
cbz cntin, L(nomatch)
ld1 {vdata.16b}, [src]
diff --git a/string/aarch64/memchr-sve.S b/string/aarch64/memchr-sve.S
index 4a5c726..2414b61 100644
--- a/string/aarch64/memchr-sve.S
+++ b/string/aarch64/memchr-sve.S
@@ -18,6 +18,8 @@
.text
ENTRY_ALIGN(__memchr_aarch64_sve, 4)
+ PTR_ARG (0)
+ SIZE_ARG (2)
dup z1.b, w1 /* duplicate c to a vector */
setffr /* initialize FFR */
mov x3, 0 /* initialize off */
diff --git a/string/aarch64/memchr.S b/string/aarch64/memchr.S
index dfba79f..4197d19 100644
--- a/string/aarch64/memchr.S
+++ b/string/aarch64/memchr.S
@@ -47,6 +47,8 @@
*/
ENTRY (__memchr_aarch64)
+ PTR_ARG (0)
+ SIZE_ARG (2)
/* Do not dereference srcin if no bytes to compare. */
cbz cntin, L(zero_length)
/*
diff --git a/string/aarch64/memcmp-sve.S b/string/aarch64/memcmp-sve.S
index 8a0a2ea..b6b2ae2 100644
--- a/string/aarch64/memcmp-sve.S
+++ b/string/aarch64/memcmp-sve.S
@@ -18,6 +18,9 @@
.text
ENTRY_ALIGN (__memcmp_aarch64_sve, 4)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
mov x3, 0 /* initialize off */
0: whilelo p0.b, x3, x2 /* while off < max */
diff --git a/string/aarch64/memcmp.S b/string/aarch64/memcmp.S
index dac9147..84545e9 100644
--- a/string/aarch64/memcmp.S
+++ b/string/aarch64/memcmp.S
@@ -28,6 +28,9 @@
#define tmp2 x8
ENTRY (__memcmp_aarch64)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
subs limit, limit, 8
b.lo L(less8)
diff --git a/string/aarch64/memcpy-advsimd.S b/string/aarch64/memcpy-advsimd.S
index 23545a3..f97f2c3 100644
--- a/string/aarch64/memcpy-advsimd.S
+++ b/string/aarch64/memcpy-advsimd.S
@@ -52,6 +52,9 @@
ENTRY_ALIAS (__memmove_aarch64_simd)
ENTRY (__memcpy_aarch64_simd)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
add srcend, src, count
add dstend, dstin, count
cmp count, 128
diff --git a/string/aarch64/memcpy.S b/string/aarch64/memcpy.S
index 157bb0d..dd254f6 100644
--- a/string/aarch64/memcpy.S
+++ b/string/aarch64/memcpy.S
@@ -55,6 +55,9 @@
ENTRY_ALIAS (__memmove_aarch64)
ENTRY (__memcpy_aarch64)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
add srcend, src, count
add dstend, dstin, count
cmp count, 128
diff --git a/string/aarch64/memrchr.S b/string/aarch64/memrchr.S
index ad42b49..7b4be84 100644
--- a/string/aarch64/memrchr.S
+++ b/string/aarch64/memrchr.S
@@ -46,6 +46,7 @@
string, counting trailing zeros identifies exactly which byte matched. */
ENTRY (__memrchr_aarch64)
+ PTR_ARG (0)
add end, srcin, cntin
sub endm1, end, 1
bic src, endm1, 15
diff --git a/string/aarch64/memset.S b/string/aarch64/memset.S
index 27743f1..dc53d60 100644
--- a/string/aarch64/memset.S
+++ b/string/aarch64/memset.S
@@ -22,6 +22,8 @@
#define zva_val x5
ENTRY (__memset_aarch64)
+ PTR_ARG (0)
+ SIZE_ARG (2)
dup v0.16B, valw
add dstend, dstin, count
diff --git a/string/aarch64/strchr-mte.S b/string/aarch64/strchr-mte.S
index 577752e..dcb0e46 100644
--- a/string/aarch64/strchr-mte.S
+++ b/string/aarch64/strchr-mte.S
@@ -43,6 +43,7 @@
string, counting trailing zeros identifies exactly which byte matched. */
ENTRY (__strchr_aarch64_mte)
+ PTR_ARG (0)
bic src, srcin, 15
dup vrepchr.16b, chrin
ld1 {vdata.16b}, [src]
diff --git a/string/aarch64/strchr-sve.S b/string/aarch64/strchr-sve.S
index 495beda..ce23282 100644
--- a/string/aarch64/strchr-sve.S
+++ b/string/aarch64/strchr-sve.S
@@ -25,6 +25,7 @@
#endif
ENTRY_ALIGN (FUNC, 4)
+ PTR_ARG (0)
dup z1.b, w1 /* replicate byte across vector */
setffr /* initialize FFR */
ptrue p1.b /* all ones; loop invariant */
diff --git a/string/aarch64/strchr.S b/string/aarch64/strchr.S
index 8d8e3fc..aaba30d 100644
--- a/string/aarch64/strchr.S
+++ b/string/aarch64/strchr.S
@@ -51,6 +51,7 @@
/* Locals and temporaries. */
ENTRY (__strchr_aarch64)
+ PTR_ARG (0)
/* Magic constant 0xc0300c03 to allow us to identify which lane
matches the requested byte. Even bits are set if the character
matches, odd bits if either the char is NUL or matches. */
diff --git a/string/aarch64/strchrnul-mte.S b/string/aarch64/strchrnul-mte.S
index 0dbf0dc..1b0d0a6 100644
--- a/string/aarch64/strchrnul-mte.S
+++ b/string/aarch64/strchrnul-mte.S
@@ -41,6 +41,7 @@
string, counting trailing zeros identifies exactly which byte matched. */
ENTRY (__strchrnul_aarch64_mte)
+ PTR_ARG (0)
bic src, srcin, 15
dup vrepchr.16b, chrin
ld1 {vdata.16b}, [src]
diff --git a/string/aarch64/strchrnul.S b/string/aarch64/strchrnul.S
index 45be15c..d459e7c 100644
--- a/string/aarch64/strchrnul.S
+++ b/string/aarch64/strchrnul.S
@@ -47,6 +47,7 @@
/* Locals and temporaries. */
ENTRY (__strchrnul_aarch64)
+ PTR_ARG (0)
/* Magic constant 0x40100401 to allow us to identify which lane
matches the termination condition. */
mov wtmp2, #0x0401
diff --git a/string/aarch64/strcmp-mte.S b/string/aarch64/strcmp-mte.S
index 1b6db42..12d1a6b 100644
--- a/string/aarch64/strcmp-mte.S
+++ b/string/aarch64/strcmp-mte.S
@@ -51,6 +51,8 @@
ENTRY (__strcmp_aarch64_mte)
+ PTR_ARG (0)
+ PTR_ARG (1)
sub off2, src2, src1
mov zeroones, REP8_01
and tmp, src1, 7
diff --git a/string/aarch64/strcmp-sve.S b/string/aarch64/strcmp-sve.S
index dc5b769..cac03c0 100644
--- a/string/aarch64/strcmp-sve.S
+++ b/string/aarch64/strcmp-sve.S
@@ -18,6 +18,8 @@
.text
ENTRY_ALIGN (__strcmp_aarch64_sve, 4)
+ PTR_ARG (0)
+ PTR_ARG (1)
setffr /* initialize FFR */
ptrue p1.b, all /* all ones; loop invariant */
mov x2, 0 /* initialize offset */
diff --git a/string/aarch64/strcmp.S b/string/aarch64/strcmp.S
index ee95958..7714ebf 100644
--- a/string/aarch64/strcmp.S
+++ b/string/aarch64/strcmp.S
@@ -37,6 +37,8 @@
/* Start of performance-critical section -- one 64B cache line. */
ENTRY (__strcmp_aarch64)
+ PTR_ARG (0)
+ PTR_ARG (1)
eor tmp1, src1, src2
mov zeroones, #REP8_01
tst tmp1, #7
diff --git a/string/aarch64/strcpy-mte.S b/string/aarch64/strcpy-mte.S
index 7c8629e..88c222d 100644
--- a/string/aarch64/strcpy-mte.S
+++ b/string/aarch64/strcpy-mte.S
@@ -55,6 +55,8 @@
string, counting trailing zeros identifies exactly which byte matched. */
ENTRY (STRCPY)
+ PTR_ARG (0)
+ PTR_ARG (1)
bic src, srcin, 15
mov wtmp, 0xf00f
ld1 {vdata.16b}, [src]
diff --git a/string/aarch64/strcpy-sve.S b/string/aarch64/strcpy-sve.S
index a785d45..550132b 100644
--- a/string/aarch64/strcpy-sve.S
+++ b/string/aarch64/strcpy-sve.S
@@ -25,6 +25,8 @@
#endif
ENTRY_ALIGN (FUNC, 4)
+ PTR_ARG (0)
+ PTR_ARG (1)
setffr /* initialize FFR */
ptrue p2.b, all /* all ones; loop invariant */
mov x2, 0 /* initialize offset */
diff --git a/string/aarch64/strcpy.S b/string/aarch64/strcpy.S
index a6090c8..2668b67 100644
--- a/string/aarch64/strcpy.S
+++ b/string/aarch64/strcpy.S
@@ -80,6 +80,8 @@
#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
ENTRY (STRCPY)
+ PTR_ARG (0)
+ PTR_ARG (1)
/* For moderately short strings, the fastest way to do the copy is to
calculate the length of the string in the same way as strlen, then
essentially do a memcpy of the result. This avoids the need for
diff --git a/string/aarch64/strlen-mte.S b/string/aarch64/strlen-mte.S
index 6a99340..7cf41d5 100644
--- a/string/aarch64/strlen-mte.S
+++ b/string/aarch64/strlen-mte.S
@@ -39,6 +39,7 @@
string, counting trailing zeros identifies exactly which byte matched. */
ENTRY (__strlen_aarch64_mte)
+ PTR_ARG (0)
bic src, srcin, 15
mov wtmp, 0xf00f
ld1 {vdata.16b}, [src]
diff --git a/string/aarch64/strlen-sve.S b/string/aarch64/strlen-sve.S
index 9a9a359..844353a 100644
--- a/string/aarch64/strlen-sve.S
+++ b/string/aarch64/strlen-sve.S
@@ -18,6 +18,7 @@
.text
ENTRY_ALIGN (__strlen_aarch64_sve, 4)
+ PTR_ARG (0)
setffr /* initialize FFR */
ptrue p2.b /* all ones; loop invariant */
mov x1, 0 /* initialize length */
diff --git a/string/aarch64/strlen.S b/string/aarch64/strlen.S
index b20eaeb..a1b164a 100644
--- a/string/aarch64/strlen.S
+++ b/string/aarch64/strlen.S
@@ -74,6 +74,7 @@
character, return the length, if not, continue in the main loop. */
ENTRY (__strlen_aarch64)
+ PTR_ARG (0)
and tmp1, srcin, MIN_PAGE_SIZE - 1
cmp tmp1, MIN_PAGE_SIZE - 32
b.hi L(page_cross)
diff --git a/string/aarch64/strncmp-mte.S b/string/aarch64/strncmp-mte.S
index 46765d6..42f1a3b 100644
--- a/string/aarch64/strncmp-mte.S
+++ b/string/aarch64/strncmp-mte.S
@@ -59,6 +59,9 @@
nop /* Pad so that the loop below fits a cache line. */
.endr
ENTRY_ALIGN (__strncmp_aarch64_mte, 0)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
cbz limit, L(ret0)
eor tmp1, src1, src2
mov zeroones, #REP8_01
diff --git a/string/aarch64/strncmp-sve.S b/string/aarch64/strncmp-sve.S
index fdbe7ae..663d93f 100644
--- a/string/aarch64/strncmp-sve.S
+++ b/string/aarch64/strncmp-sve.S
@@ -18,6 +18,9 @@
.text
ENTRY_ALIGN (__strncmp_aarch64_sve, 4)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
setffr /* initialize FFR */
mov x3, 0 /* initialize off */
diff --git a/string/aarch64/strncmp.S b/string/aarch64/strncmp.S
index 584c54a..b8824b8 100644
--- a/string/aarch64/strncmp.S
+++ b/string/aarch64/strncmp.S
@@ -46,6 +46,9 @@
nop /* Pad so that the loop below fits a cache line. */
.endr
ENTRY_ALIGN (__strncmp_aarch64, 0)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
cbz limit, L(ret0)
eor tmp1, src1, src2
mov zeroones, #REP8_01
diff --git a/string/aarch64/strnlen-sve.S b/string/aarch64/strnlen-sve.S
index 5ad40d3..ede0a17 100644
--- a/string/aarch64/strnlen-sve.S
+++ b/string/aarch64/strnlen-sve.S
@@ -18,6 +18,8 @@
.text
ENTRY_ALIGN (__strnlen_aarch64_sve, 4)
+ PTR_ARG (0)
+ SIZE_ARG (1)
setffr /* initialize FFR */
mov x2, 0 /* initialize len */
b 1f
diff --git a/string/aarch64/strnlen.S b/string/aarch64/strnlen.S
index 4852edc..48d2495 100644
--- a/string/aarch64/strnlen.S
+++ b/string/aarch64/strnlen.S
@@ -42,6 +42,8 @@
string, counting trailing zeros identifies exactly which byte matched. */
ENTRY (__strnlen_aarch64)
+ PTR_ARG (0)
+ SIZE_ARG (1)
bic src, srcin, 15
mov wtmp, 0xf00f
cbz cntin, L(nomatch)
diff --git a/string/aarch64/strrchr-mte.S b/string/aarch64/strrchr-mte.S
index 5a409b9..1e4fb1a 100644
--- a/string/aarch64/strrchr-mte.S
+++ b/string/aarch64/strrchr-mte.S
@@ -44,6 +44,7 @@
if the relevant byte matched the NUL end of string. */
ENTRY (__strrchr_aarch64_mte)
+ PTR_ARG (0)
bic src, srcin, 15
dup vrepchr.16b, chrin
mov wtmp, 0x3003
diff --git a/string/aarch64/strrchr-sve.S b/string/aarch64/strrchr-sve.S
index dbb9bfd..6e3f352 100644
--- a/string/aarch64/strrchr-sve.S
+++ b/string/aarch64/strrchr-sve.S
@@ -18,6 +18,7 @@
.text
ENTRY_ALIGN (__strrchr_aarch64_sve, 4)
+ PTR_ARG (0)
dup z1.b, w1 /* replicate byte across vector */
setffr /* initialize FFR */
ptrue p1.b /* all ones; loop invariant */
diff --git a/string/aarch64/strrchr.S b/string/aarch64/strrchr.S
index f3d22d4..56185ff 100644
--- a/string/aarch64/strrchr.S
+++ b/string/aarch64/strrchr.S
@@ -55,6 +55,7 @@
identify exactly which byte is causing the termination, and why. */
ENTRY (__strrchr_aarch64)
+ PTR_ARG (0)
/* Magic constant 0x40100401 to allow us to identify which lane
matches the requested byte. Magic constant 0x80200802 used
similarly for NUL termination. */
diff --git a/string/asmdefs.h b/string/asmdefs.h
index 31c0f9d..35874e9 100644
--- a/string/asmdefs.h
+++ b/string/asmdefs.h
@@ -81,4 +81,18 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
#define L(l) .L ## l
+#ifdef __ILP32__
+ /* Sanitize padding bits of pointer arguments as per aapcs64 */
+#define PTR_ARG(n) mov w##n, w##n
+#else
+#define PTR_ARG(n)
+#endif
+
+#ifdef __ILP32__
+ /* Sanitize padding bits of size arguments as per aapcs64 */
+#define SIZE_ARG(n) mov w##n, w##n
+#else
+#define SIZE_ARG(n)
+#endif
+
#endif