aboutsummaryrefslogtreecommitdiff
path: root/string/include
diff options
context:
space:
mode:
authorKrzysztof Koch <krzysztof.koch@arm.com>2019-12-09 10:26:12 +0000
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2019-12-10 15:33:19 +0000
commit3377796fe24ff1d5396609205426402678208eb1 (patch)
treec350db0a1be8f6fababea05a8abaa4a896d59bcc /string/include
parent709020eddcede574f3786debe5b3361595f80c77 (diff)
downloadarm-optimized-routines-3377796fe24ff1d5396609205426402678208eb1.tar.gz
aarch64: Combine memcpy and memmove implementations
Modify integer and SIMD versions of memcpy to handle overlaps correctly. Make __memmove_aarch64 and __memmove_aarch64_simd alias to __memcpy_aarch64 and __memcpy_aarch64_simd respectively. Complete sharing of code between memcpy and memmove implementations is possible without noticeable performance penalty. This is thanks to moving the source and destination buffer overlap detection after the code for handling small and medium copies which are overlap-safe anyway. Benchmarking shows that keeping two versions of memcpy is necessary because newer platforms favor aligning src over destination for large copies. Using NEON registers also gives a small speedup. However, aligning dst and using general-purpose registers works best for older platforms. Consequently, memcpy.S and memcpy_simd.S contain memcpy code which is identical except for the registers used and src vs dst alignment.
Diffstat (limited to 'string/include')
-rw-r--r--string/include/stringlib.h3
1 files changed, 2 insertions, 1 deletions
diff --git a/string/include/stringlib.h b/string/include/stringlib.h
index baa9383..3f60220 100644
--- a/string/include/stringlib.h
+++ b/string/include/stringlib.h
@@ -15,7 +15,7 @@
#if __aarch64__
void *__memcpy_bytewise (void *__restrict, const void *__restrict, size_t);
void *__memcpy_aarch64 (void *__restrict, const void *__restrict, size_t);
-void *__memmove_aarch64 (void *__restrict, const void *__restrict, size_t);
+void *__memmove_aarch64 (void *, const void *, size_t);
void *__memset_aarch64 (void *, int, size_t);
void *__memchr_aarch64 (const void *, int, size_t);
int __memcmp_aarch64 (const void *, const void *, size_t);
@@ -28,6 +28,7 @@ size_t __strnlen_aarch64 (const char *, size_t);
int __strncmp_aarch64 (const char *, const char *, size_t);
#if __ARM_NEON
void *__memcpy_aarch64_simd (void *__restrict, const void *__restrict, size_t);
+void *__memmove_aarch64_simd (void *, const void *, size_t);
#endif
# if __ARM_FEATURE_SVE
void *__memchr_aarch64_sve (const void *, int, size_t);