aboutsummaryrefslogtreecommitdiff
path: root/string/arm
diff options
context:
space:
mode:
authorVictor Do Nascimento <Victor.DoNascimento@arm.com>2022-10-21 17:21:11 +0100
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2022-10-21 17:27:12 +0100
commit7780a64c543c43360f603ce81173ac530389fac6 (patch)
treea12b36f609c739caafb4c5996d1a56b582ef3f34 /string/arm
parent84c28b9ac99a48a310fa459df534c09f33e304f7 (diff)
downloadarm-optimized-routines-7780a64c543c43360f603ce81173ac530389fac6.tar.gz
string: arm: Add new functionality to prologue/epilogue assembler macros.
This patch adds options for automatic alignment enforcement and for pushing/popping the lr register to prologue and epilogue assembler macros, while making the pushing of the ip register optional for PACBTI. Furthermore, as the use of these macros is independent of PACBTI and may be used on architectures without the feature, the macros are moved to a common header. Improvements are also made to cfi handling. Where absolute cfi offset calculation is complicated by optional function prologue parameters (e.g. the pushing of pac-codes to the stack for M-profile pacbti on function entry and pushing of dummy register when alignment required), replace the use of .cfi_offset for .cfi_rel_offset, simplifying cfi calculations by basing offsets on SP rather than the cfa. Finally, extensive in-source documentation is added to these macros to facilitate their use and further development. Built w/ arm-none-linux-gnueabihf, ran make check-string w/ qemu-arm-static.
Diffstat (limited to 'string/arm')
-rw-r--r--string/arm/memchr.S22
-rw-r--r--string/arm/strcmp.S30
-rw-r--r--string/arm/strlen-armv6t2.S7
3 files changed, 32 insertions, 27 deletions
diff --git a/string/arm/memchr.S b/string/arm/memchr.S
index 83a96ca..125618d 100644
--- a/string/arm/memchr.S
+++ b/string/arm/memchr.S
@@ -36,8 +36,8 @@
#define CHARTSTMASK(c) 1<<(c*8)
#endif
.thumb
+#include "../asmdefs.h"
-#include "../pacbti.h"
@ ---------------------------------------------------------------------------
.thumb_func
@@ -74,10 +74,10 @@ __memchr_arm:
@ At this point, we are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
.cfi_adjust_cfa_offset 16
- .cfi_offset 4, -(16+PAC_CFI_ADJ)
- .cfi_offset 5, -(12+PAC_CFI_ADJ)
- .cfi_offset 6, -(8+PAC_CFI_ADJ)
- .cfi_offset 7, -(4+PAC_CFI_ADJ)
+ .cfi_rel_offset 4, 0
+ .cfi_rel_offset 5, 4
+ .cfi_rel_offset 6, 8
+ .cfi_rel_offset 7, 12
orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with
@@ -116,16 +116,20 @@ __memchr_arm:
bne 21b @ on r2 flags
40:
+ .cfi_remember_state
movs r0,#0 @ not found
epilogue
50:
+ .cfi_restore_state
+ .cfi_remember_state
subs r0,r0,#1 @ found
epilogue
60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one that was tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
+ .cfi_restore_state
cmp r5, #0
itte eq
moveq r5, r6 @ the end is in the 2nd word
@@ -144,8 +148,14 @@ __memchr_arm:
addeq r0,r0,#1
61:
+ pop {r4,r5,r6,r7}
+ .cfi_restore 7
+ .cfi_restore 6
+ .cfi_restore 5
+ .cfi_restore 4
+ .cfi_adjust_cfa_offset -16
subs r0,r0,#1
- epilogue 4 7
+ epilogue
.cfi_endproc
.cantunwind
.fnend
diff --git a/string/arm/strcmp.S b/string/arm/strcmp.S
index eafb9f6..b01c02e 100644
--- a/string/arm/strcmp.S
+++ b/string/arm/strcmp.S
@@ -13,7 +13,6 @@
the compares. */
#include "../asmdefs.h"
-#include "../pacbti.h"
/* Build Options:
STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
@@ -106,7 +105,7 @@
.cfi_restore 5
.cfi_adjust_cfa_offset -16
sub result, result, r1, lsr #24
- epilogue savepac=HAVE_PAC_LEAF
+ epilogue push_ip=HAVE_PAC_LEAF
#else
/* To use the big-endian trick we'd have to reverse all three words.
that's slower than this approach. */
@@ -129,7 +128,7 @@
.cfi_adjust_cfa_offset -16
sub result, result, r1
- epilogue savepac=HAVE_PAC_LEAF
+ epilogue push_ip=HAVE_PAC_LEAF
#endif
.endm
@@ -140,14 +139,14 @@ L(strcmp_start_addr):
#if STRCMP_NO_PRECHECK == 0
L(fastpath_exit):
sub r0, r2, r3
- epilogue savepac=HAVE_PAC_LEAF
+ epilogue push_ip=HAVE_PAC_LEAF
nop
#endif
.global __strcmp_arm
.type __strcmp_arm,%function
.align 0
__strcmp_arm:
- prologue savepac=HAVE_PAC_LEAF
+ prologue push_ip=HAVE_PAC_LEAF
#if STRCMP_NO_PRECHECK == 0
ldrb r2, [src1]
ldrb r3, [src2]
@@ -158,17 +157,12 @@ __strcmp_arm:
#endif
strd r4, r5, [sp, #-16]!
.cfi_adjust_cfa_offset 16
- .cfi_offset 5, -(12+PAC_CFI_ADJ_DEFAULT)
- .cfi_offset 4, -(16+PAC_CFI_ADJ_DEFAULT)
+ .cfi_rel_offset 4, 0
+ .cfi_rel_offset 5, 4
orr tmp1, src1, src2
strd r6, r7, [sp, #8]
-#if HAVE_PAC_LEAF
- .cfi_offset 6, -12
- .cfi_offset 7, -8
-#else
- .cfi_offset 6, -8
- .cfi_offset 7, -4
-#endif /* HAVE_PAC_LEAF */
+ .cfi_rel_offset 6, 8
+ .cfi_rel_offset 7, 12
mvn const_m1, #0
lsl r2, tmp1, #29
cbz r2, L(loop_aligned8)
@@ -339,7 +333,7 @@ L(misaligned_exit):
.cfi_restore 4
.cfi_adjust_cfa_offset -16
- epilogue savepac=HAVE_PAC_LEAF
+ epilogue push_ip=HAVE_PAC_LEAF
#if STRCMP_NO_PRECHECK == 0
L(aligned_m1):
@@ -391,7 +385,7 @@ L(overlap3):
.cfi_restore 7
.cfi_adjust_cfa_offset -16
neg result, result
- epilogue savepac=HAVE_PAC_LEAF
+ epilogue push_ip=HAVE_PAC_LEAF
6:
.cfi_restore_state
S2LO data1, data1, #24
@@ -467,7 +461,7 @@ L(strcmp_done_equal):
.cfi_restore 6
.cfi_restore 7
.cfi_adjust_cfa_offset -16
- epilogue savepac=HAVE_PAC_LEAF
+ epilogue push_ip=HAVE_PAC_LEAF
L(strcmp_tail):
.cfi_restore_state
@@ -491,7 +485,7 @@ L(strcmp_tail):
.cfi_restore 7
.cfi_adjust_cfa_offset -16
sub result, result, data2, lsr #24
- epilogue savepac=HAVE_PAC_LEAF
+ epilogue push_ip=HAVE_PAC_LEAF
END (__strcmp_arm)
diff --git a/string/arm/strlen-armv6t2.S b/string/arm/strlen-armv6t2.S
index 6e0352d..f06b238 100644
--- a/string/arm/strlen-armv6t2.S
+++ b/string/arm/strlen-armv6t2.S
@@ -14,7 +14,6 @@
*/
#include "../asmdefs.h"
-#include "../pacbti.h"
#ifdef __ARMEB__
#define S2LO lsl
@@ -47,7 +46,7 @@
#define tmp2 r5
ENTRY (__strlen_armv6t2)
- prologue 4 5 savepac=HAVE_PAC_LEAF
+ prologue 4 5 push_ip=HAVE_PAC_LEAF
pld [srcin, #0]
bic src, srcin, #7
mvn const_m1, #0
@@ -98,6 +97,7 @@ L(start_realigned):
beq L(loop_aligned)
L(null_found):
+ .cfi_remember_state
cmp data1a, #0
itt eq
addeq result, result, #4
@@ -107,9 +107,10 @@ L(null_found):
#endif
clz data1a, data1a
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
- epilogue 4 5 savepac=HAVE_PAC_LEAF
+ epilogue 4 5 push_ip=HAVE_PAC_LEAF
L(misaligned8):
+ .cfi_restore_state
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0