aboutsummaryrefslogtreecommitdiff
path: root/string
diff options
context:
space:
mode:
authorVictor Do Nascimento <Victor.DoNascimento@arm.com>2022-08-22 12:44:49 +0100
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2022-08-22 12:48:56 +0100
commit5c72615c203f9a2a39c04b23640ebaac26294bcb (patch)
tree862d17f33ac11c87d7ba8e35f74d2e738f854a5b /string
parentffb6461143e29d34687aca489d9fd8d297dc9920 (diff)
downloadarm-optimized-routines-5c72615c203f9a2a39c04b23640ebaac26294bcb.tar.gz
string: arm: Augment M-profile PACBTI-enablement macros
Modify previously defined PACBTI macros to allow for more flexible push/pop expressions at function prologue/epilogues, allowing further simplification of code predicated on the use of M-profile PACBTI hardware features. This patch also allows for the specification of whether generated pac keys are pushed onto the stack for leaf functions where this may not be necessary. It defines the following preprocessor macros: * HAVE_PAC_LEAF: Indicates whether pac-signing has been requested for leaf functions. * PAC_LEAF_PUSH_IP: Whether leaf functions should push the pac code to the stack irrespective of whether the ip register is clobbered in the function or not. * PAC_CFI_ADJ: Given values for the above two parameters, this holds the calculated offset applied to default CFI address/offset values as a consequence of potentially pushing the pac-code to the stack. It also defines the following assembler macros: * prologue: In addition to pushing any callee-saved registers onto the stack, it generates any requested pacbti instructions. Pushed registers are specified via the optional `first', `last' and `savepac' macro argument parameters. when a single register number is provided, it pushes that register. When two register numbers are provided, they specify a rage to save. If savepac is non-zero, the ip register is also saved. For example: prologue savepac=1 -> push {sp} prologue 1 -> push {r1} prologue 1 savepac=1 -> push {r1, ip} prologue 1 4 -> push {r1-r4} prologue 1 4 savepac=1 -> push {r1-r4, ip} * epilogue: pops registes off the stack and emmits pac key signing instruction if requested. The optional `first', `last' and `savepac' function as per the prologue macro, generating a pop instead of push instruction. * cfisavelist - prologue macro helper function, generating necessary .cfi_offset directives associated with push instruction. Therefore, the net effect of calling `prologue 1 2 savepac=1' is to generate the following: push {r1-r2, ip} .cfi_adjust_cfa_offset 12 .cfi_offset 143, -12 .cfi_offset 2, -8 .cfi_offset 1, -4 * cfirestorelist - epilogue macro helper function, emitting .cfi_restore instructions prior to resetting the cfa offset. As such, calling `epilogue 1 2 savepac=1' will produce: pop {r1-r2, ip} .cfi_restore 143 .cfi_restore 2 .cfi_restore 1 .cfi_def_cfa_offset 0
Diffstat (limited to 'string')
-rw-r--r--string/arm/memchr.S37
-rw-r--r--string/arm/strcmp.S25
-rw-r--r--string/arm/strlen-armv6t2.S39
-rw-r--r--string/pacbti.h131
4 files changed, 131 insertions, 101 deletions
diff --git a/string/arm/memchr.S b/string/arm/memchr.S
index ddc808b..83a96ca 100644
--- a/string/arm/memchr.S
+++ b/string/arm/memchr.S
@@ -52,7 +52,7 @@ __memchr_arm:
@ r1 = character to look for
@ r2 = length
@ returns r0 = pointer to character or NULL if not found
- pacbti_prologue
+ prologue
and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
cmp r2,#16 @ If it's short don't bother with anything clever
@@ -74,17 +74,10 @@ __memchr_arm:
@ At this point, we are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
.cfi_adjust_cfa_offset 16
-#if HAVE_PAC_LEAF
- .cfi_offset 4, -20
- .cfi_offset 5, -16
- .cfi_offset 6, -12
- .cfi_offset 7, -8
-#else
- .cfi_offset 4, -16
- .cfi_offset 5, -12
- .cfi_offset 6, -8
- .cfi_offset 7, -4
-#endif /* HAVE_PAC_LEAF */
+ .cfi_offset 4, -(16+PAC_CFI_ADJ)
+ .cfi_offset 5, -(12+PAC_CFI_ADJ)
+ .cfi_offset 6, -(8+PAC_CFI_ADJ)
+ .cfi_offset 7, -(4+PAC_CFI_ADJ)
orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with
@@ -124,11 +117,11 @@ __memchr_arm:
40:
movs r0,#0 @ not found
- pacbti_epilogue
+ epilogue
50:
subs r0,r0,#1 @ found
- pacbti_epilogue
+ epilogue
60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one that was tested
@@ -152,21 +145,7 @@ __memchr_arm:
61:
subs r0,r0,#1
-#if HAVE_PAC_LEAF
- pop {r4,r5,r6,r7,ip}
- .cfi_restore 143
-#else
- pop {r4,r5,r6,r7}
-#endif /* HAVE_PAC_LEAF */
- .cfi_restore 7
- .cfi_restore 6
- .cfi_restore 5
- .cfi_restore 4
- .cfi_def_cfa_offset 0
-#if HAVE_PAC_LEAF
- aut ip, lr, sp
-#endif /* HAVE_PAC_LEAF */
- bx lr
+ epilogue 4 7
.cfi_endproc
.cantunwind
.fnend
diff --git a/string/arm/strcmp.S b/string/arm/strcmp.S
index 2eb560f..a408f3f 100644
--- a/string/arm/strcmp.S
+++ b/string/arm/strcmp.S
@@ -106,7 +106,7 @@
.cfi_restore 5
.cfi_adjust_cfa_offset -16
sub result, result, r1, lsr #24
- pacbti_epilogue
+ epilogue savepac=HAVE_PAC_LEAF
#else
/* To use the big-endian trick we'd have to reverse all three words.
that's slower than this approach. */
@@ -129,7 +129,7 @@
.cfi_adjust_cfa_offset -16
sub result, result, r1
- pacbti_epilogue
+ epilogue savepac=HAVE_PAC_LEAF
#endif
.endm
@@ -140,14 +140,14 @@ L(strcmp_start_addr):
#if STRCMP_NO_PRECHECK == 0
L(fastpath_exit):
sub r0, r2, r3
- pacbti_epilogue
+ epilogue savepac=HAVE_PAC_LEAF
nop
#endif
.global __strcmp_arm
.type __strcmp_arm,%function
.align 0
__strcmp_arm:
- pacbti_prologue
+ prologue savepac=HAVE_PAC_LEAF
#if STRCMP_NO_PRECHECK == 0
ldrb r2, [src1]
ldrb r3, [src2]
@@ -158,13 +158,8 @@ __strcmp_arm:
#endif
strd r4, r5, [sp, #-16]!
.cfi_adjust_cfa_offset 16
-#if HAVE_PAC_LEAF
- .cfi_offset 4, -20
- .cfi_offset 5, -16
-#else
- .cfi_offset 4, -16
- .cfi_offset 5, -12
-#endif /* HAVE_PAC_LEAF */
+ .cfi_offset 5, -(12+PAC_CFI_ADJ)
+ .cfi_offset 4, -(16+PAC_CFI_ADJ)
orr tmp1, src1, src2
strd r6, r7, [sp, #8]
#if HAVE_PAC_LEAF
@@ -344,7 +339,7 @@ L(misaligned_exit):
.cfi_restore 4
.cfi_adjust_cfa_offset -16
- pacbti_epilogue
+ epilogue savepac=HAVE_PAC_LEAF
#if STRCMP_NO_PRECHECK == 0
L(aligned_m1):
@@ -396,7 +391,7 @@ L(overlap3):
.cfi_restore 7
.cfi_adjust_cfa_offset -16
neg result, result
- pacbti_epilogue
+ epilogue savepac=HAVE_PAC_LEAF
6:
.cfi_restore_state
S2LO data1, data1, #24
@@ -472,7 +467,7 @@ L(strcmp_done_equal):
.cfi_restore 6
.cfi_restore 7
.cfi_adjust_cfa_offset -16
- pacbti_epilogue
+ epilogue savepac=HAVE_PAC_LEAF
L(strcmp_tail):
.cfi_restore_state
@@ -496,7 +491,7 @@ L(strcmp_tail):
.cfi_restore 7
.cfi_adjust_cfa_offset -16
sub result, result, data2, lsr #24
- pacbti_epilogue
+ epilogue savepac=HAVE_PAC_LEAF
END (__strcmp_arm)
diff --git a/string/arm/strlen-armv6t2.S b/string/arm/strlen-armv6t2.S
index 49ba928..6e0352d 100644
--- a/string/arm/strlen-armv6t2.S
+++ b/string/arm/strlen-armv6t2.S
@@ -47,29 +47,7 @@
#define tmp2 r5
ENTRY (__strlen_armv6t2)
- /* common pacbti_prologue macro from pacbti.h not used.
- handwritten prologue saves one push instruction. */
-#if HAVE_PAC_LEAF
-#if __ARM_FEATURE_BTI_DEFAULT
- pacbti ip, lr, sp
-#else
- pac ip, lr, sp
-#endif /* __ARM_FEATURE_BTI_DEFAULT */
- .cfi_register 143, 12
- push {r4, r5, ip}
- .cfi_def_cfa_offset 12
- .cfi_offset 143, -4
- .cfi_offset 5, -8
- .cfi_offset 4, -12
-#else
-#if __ARM_FEATURE_BTI_DEFAULT
- bti
-#endif /* __ARM_FEATURE_BTI_DEFAULT */
- push {r4, r5}
- .cfi_def_cfa_offset 8
- .cfi_offset 4, -8
- .cfi_offset 5, -4
-#endif /* HAVE_PAC_LEAF */
+ prologue 4 5 savepac=HAVE_PAC_LEAF
pld [srcin, #0]
bic src, srcin, #7
mvn const_m1, #0
@@ -129,20 +107,7 @@ L(null_found):
#endif
clz data1a, data1a
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
-#if HAVE_PAC_LEAF
- pop {r4, r5, ip}
- .cfi_restore 4
- .cfi_restore 5
- .cfi_restore 143
- .cfi_def_cfa_offset 0
- aut ip, lr, sp
-#else
- ldrd r4, r5, [sp], #8
- .cfi_restore 4
- .cfi_restore 5
- .cfi_def_cfa_offset 0
-#endif /* HAVE_PAC_LEAF */
- bx lr
+ epilogue 4 5 savepac=HAVE_PAC_LEAF
L(misaligned8):
ldrd data1a, data1b, [src]
diff --git a/string/pacbti.h b/string/pacbti.h
index 9162b27..0745233 100644
--- a/string/pacbti.h
+++ b/string/pacbti.h
@@ -5,39 +5,130 @@
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-/* Checki whether leaf function PAC signing has been requested
- in the -mbranch-protect compile-time option */
+/* Check whether leaf function PAC signing has been requested in the
+ -mbranch-protect compile-time option. */
#define LEAF_PROTECT_BIT 2
-#define HAVE_PAC_LEAF \
+
+#ifdef __ARM_FEATURE_PAC_DEFAULT
+# define HAVE_PAC_LEAF \
__ARM_FEATURE_PAC_DEFAULT & (1 << LEAF_PROTECT_BIT)
+#else
+# define HAVE_PAC_LEAF 0
+#endif
+
+/* Provide default parameters for PAC-code handling in leaf-functions. */
+#ifndef PAC_LEAF_PUSH_IP
+# define PAC_LEAF_PUSH_IP 1
+#endif
+
+#if HAVE_PAC_LEAF
+# if PAC_LEAF_PUSH_IP
+# define PAC_CFI_ADJ 4
+# else
+# define PAC_CFI_ADJ 0
+# endif /* PAC_LEAF_PUSH_IP*/
+#else
+# undef PAC_LEAF_PUSH_IP
+# define PAC_LEAF_PUSH_IP 0
+# define PAC_CFI_ADJ 0
+#endif /* HAVE_PAC_LEAF */
+
+/* Emit .cfi_restore directives for a consecutive sequence of registers. */
+ .macro cfirestorelist first, last
+ .cfi_restore \last
+ .if \last-\first
+ cfirestorelist \first, \last-1
+ .endif
+ .endm
-/* Macro to handle function entry depending on branch-protection
- schemes */
- .macro pacbti_prologue
+/* Emit .cfi_offset directives for a consecutive sequence of registers. */
+ .macro cfisavelist first, last, index=1
+ .cfi_offset \last, -4*(\index) - PAC_CFI_ADJ
+ .if \last-\first
+ cfisavelist \first, \last-1, \index+1
+ .endif
+ .endm
+
+/* Create a prologue entry sequence handling PAC/BTI, if required and emitting
+ CFI directives for generated PAC code and any pushed registers. */
+ .macro prologue first=-1, last=-1, savepac=PAC_LEAF_PUSH_IP
#if HAVE_PAC_LEAF
#if __ARM_FEATURE_BTI_DEFAULT
- pacbti ip, lr, sp
+ pacbti ip, lr, sp
#else
- pac ip, lr, sp
+ pac ip, lr, sp
#endif /* __ARM_FEATURE_BTI_DEFAULT */
.cfi_register 143, 12
- str ip, [sp, #-4]!
- .save {ra_auth_code}
- .cfi_def_cfa_offset 4
- .cfi_offset 143, -4
-#elif __ARM_FEATURE_BTI_DEFAULT
+#else
+#if __ARM_FEATURE_BTI_DEFAULT
bti
+#endif /* __ARM_FEATURE_BTI_DEFAULT */
#endif /* HAVE_PAC_LEAF */
+ .if \first != -1
+ .if \last != -1
+ .if \savepac
+ push {r\first-r\last, ip}
+ .cfi_adjust_cfa_offset ((\last-\first)+1)*4 + PAC_CFI_ADJ
+ .cfi_offset 143, -PAC_CFI_ADJ
+ cfisavelist \first, \last
+ .else
+ push {r\first-r\last}
+ .cfi_adjust_cfa_offset ((\last-\first)+1)*4
+ cfisavelist \first, \last
+ .endif
+ .else
+ .if \savepac
+ push {r\first, ip}
+ .cfi_adjust_cfa_offset 4 + PAC_CFI_ADJ
+ .cfi_offset 143, -PAC_CFI_ADJ
+ cfisavelist \first, \first
+ .else // !\savepac
+ push {r\first}
+ .cfi_adjust_cfa_offset PAC_CFI_ADJ
+ cfisavelist \first, \first
+ .endif
+ .endif
+ .else // \first == -1
+ .if \savepac
+ push {ip}
+ .cfi_adjust_cfa_offset PAC_CFI_ADJ
+ .cfi_offset 143, -PAC_CFI_ADJ
+ .endif
+ .endif
.endm
-/* Macro to handle different branch exchange cases depending on
- branch-protection schemes */
- .macro pacbti_epilogue
-#if HAVE_PAC_LEAF
- ldr ip, [sp], #4
+/* Create an epilogue exit sequence handling PAC/BTI, if required and emitting
+ CFI directives for all restored registers. */
+ .macro epilogue first=-1, last=-1, savepac=PAC_LEAF_PUSH_IP
+ .if \first != -1
+ .if \last != -1
+ .if \savepac
+ pop {r\first-r\last, ip}
+ .cfi_restore 143
+ cfirestorelist \first, \last
+ .else
+ pop {r\first-r\last}
+ cfirestorelist \first, \last
+ .endif
+ .else
+ .if \savepac
+ pop {r\first, ip}
.cfi_restore 143
+ cfirestorelist \first, \first
+ .else
+ pop {r\first}
+ cfirestorelist \first, \first
+ .endif
+ .endif
+ .else
+ .if \savepac
+ pop {ip}
+ .cfi_restore 143
+ .endif
+ .endif
.cfi_def_cfa_offset 0
- aut ip, lr, sp
+#if HAVE_PAC_LEAF
+ aut ip, lr, sp
#endif /* HAVE_PAC_LEAF */
- bx lr
+ bx lr
.endm