diff options
author | Victor Do Nascimento <Victor.DoNascimento@arm.com> | 2022-10-21 17:21:11 +0100 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2022-10-21 17:27:12 +0100 |
commit | 7780a64c543c43360f603ce81173ac530389fac6 (patch) | |
tree | a12b36f609c739caafb4c5996d1a56b582ef3f34 /string/arm | |
parent | 84c28b9ac99a48a310fa459df534c09f33e304f7 (diff) | |
download | arm-optimized-routines-7780a64c543c43360f603ce81173ac530389fac6.tar.gz |
string: arm: Add new functionality to prologue/epilogue assembler macros.
This patch adds options for automatic alignment enforcement and for
pushing/popping the lr register to prologue and epilogue assembler macros,
while making the pushing of the ip register optional for PACBTI.
Furthermore, as the use of these macros is independent of PACBTI and
may be used on architectures without the feature, the macros are moved
to a common header.
Improvements are also made to cfi handling. Where absolute cfi offset
calculation is complicated by optional function prologue
parameters (e.g. the pushing of pac-codes to the stack for M-profile
pacbti on function entry and pushing of dummy register when alignment
required), replace the use of .cfi_offset for .cfi_rel_offset,
simplifying cfi calculations by basing offsets on SP rather than the
cfa.
Finally, extensive in-source documentation is added to these macros to
facilitate their use and further development.
Built w/ arm-none-linux-gnueabihf, ran make check-string w/ qemu-arm-static.
Diffstat (limited to 'string/arm')
-rw-r--r-- | string/arm/memchr.S | 22 | ||||
-rw-r--r-- | string/arm/strcmp.S | 30 | ||||
-rw-r--r-- | string/arm/strlen-armv6t2.S | 7 |
3 files changed, 32 insertions, 27 deletions
diff --git a/string/arm/memchr.S b/string/arm/memchr.S index 83a96ca..125618d 100644 --- a/string/arm/memchr.S +++ b/string/arm/memchr.S @@ -36,8 +36,8 @@ #define CHARTSTMASK(c) 1<<(c*8) #endif .thumb +#include "../asmdefs.h" -#include "../pacbti.h" @ --------------------------------------------------------------------------- .thumb_func @@ -74,10 +74,10 @@ __memchr_arm: @ At this point, we are aligned, we know we have at least 8 bytes to work with push {r4,r5,r6,r7} .cfi_adjust_cfa_offset 16 - .cfi_offset 4, -(16+PAC_CFI_ADJ) - .cfi_offset 5, -(12+PAC_CFI_ADJ) - .cfi_offset 6, -(8+PAC_CFI_ADJ) - .cfi_offset 7, -(4+PAC_CFI_ADJ) + .cfi_rel_offset 4, 0 + .cfi_rel_offset 5, 4 + .cfi_rel_offset 6, 8 + .cfi_rel_offset 7, 12 orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes orr r1, r1, r1, lsl #16 bic r4, r2, #7 @ Number of double words to work with @@ -116,16 +116,20 @@ __memchr_arm: bne 21b @ on r2 flags 40: + .cfi_remember_state movs r0,#0 @ not found epilogue 50: + .cfi_restore_state + .cfi_remember_state subs r0,r0,#1 @ found epilogue 60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was @ r0 points to the start of the double word after the one that was tested @ r5 has the 00/ff pattern for the first word, r6 has the chained value + .cfi_restore_state cmp r5, #0 itte eq moveq r5, r6 @ the end is in the 2nd word @@ -144,8 +148,14 @@ __memchr_arm: addeq r0,r0,#1 61: + pop {r4,r5,r6,r7} + .cfi_restore 7 + .cfi_restore 6 + .cfi_restore 5 + .cfi_restore 4 + .cfi_adjust_cfa_offset -16 subs r0,r0,#1 - epilogue 4 7 + epilogue .cfi_endproc .cantunwind .fnend diff --git a/string/arm/strcmp.S b/string/arm/strcmp.S index eafb9f6..b01c02e 100644 --- a/string/arm/strcmp.S +++ b/string/arm/strcmp.S @@ -13,7 +13,6 @@ the compares. */ #include "../asmdefs.h" -#include "../pacbti.h" /* Build Options: STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first @@ -106,7 +105,7 @@ .cfi_restore 5 .cfi_adjust_cfa_offset -16 sub result, result, r1, lsr #24 - epilogue savepac=HAVE_PAC_LEAF + epilogue push_ip=HAVE_PAC_LEAF #else /* To use the big-endian trick we'd have to reverse all three words. that's slower than this approach. */ @@ -129,7 +128,7 @@ .cfi_adjust_cfa_offset -16 sub result, result, r1 - epilogue savepac=HAVE_PAC_LEAF + epilogue push_ip=HAVE_PAC_LEAF #endif .endm @@ -140,14 +139,14 @@ L(strcmp_start_addr): #if STRCMP_NO_PRECHECK == 0 L(fastpath_exit): sub r0, r2, r3 - epilogue savepac=HAVE_PAC_LEAF + epilogue push_ip=HAVE_PAC_LEAF nop #endif .global __strcmp_arm .type __strcmp_arm,%function .align 0 __strcmp_arm: - prologue savepac=HAVE_PAC_LEAF + prologue push_ip=HAVE_PAC_LEAF #if STRCMP_NO_PRECHECK == 0 ldrb r2, [src1] ldrb r3, [src2] @@ -158,17 +157,12 @@ __strcmp_arm: #endif strd r4, r5, [sp, #-16]! .cfi_adjust_cfa_offset 16 - .cfi_offset 5, -(12+PAC_CFI_ADJ_DEFAULT) - .cfi_offset 4, -(16+PAC_CFI_ADJ_DEFAULT) + .cfi_rel_offset 4, 0 + .cfi_rel_offset 5, 4 orr tmp1, src1, src2 strd r6, r7, [sp, #8] -#if HAVE_PAC_LEAF - .cfi_offset 6, -12 - .cfi_offset 7, -8 -#else - .cfi_offset 6, -8 - .cfi_offset 7, -4 -#endif /* HAVE_PAC_LEAF */ + .cfi_rel_offset 6, 8 + .cfi_rel_offset 7, 12 mvn const_m1, #0 lsl r2, tmp1, #29 cbz r2, L(loop_aligned8) @@ -339,7 +333,7 @@ L(misaligned_exit): .cfi_restore 4 .cfi_adjust_cfa_offset -16 - epilogue savepac=HAVE_PAC_LEAF + epilogue push_ip=HAVE_PAC_LEAF #if STRCMP_NO_PRECHECK == 0 L(aligned_m1): @@ -391,7 +385,7 @@ L(overlap3): .cfi_restore 7 .cfi_adjust_cfa_offset -16 neg result, result - epilogue savepac=HAVE_PAC_LEAF + epilogue push_ip=HAVE_PAC_LEAF 6: .cfi_restore_state S2LO data1, data1, #24 @@ -467,7 +461,7 @@ L(strcmp_done_equal): .cfi_restore 6 .cfi_restore 7 .cfi_adjust_cfa_offset -16 - epilogue savepac=HAVE_PAC_LEAF + epilogue push_ip=HAVE_PAC_LEAF L(strcmp_tail): .cfi_restore_state @@ -491,7 +485,7 @@ L(strcmp_tail): .cfi_restore 7 .cfi_adjust_cfa_offset -16 sub result, result, data2, lsr #24 - epilogue savepac=HAVE_PAC_LEAF + epilogue push_ip=HAVE_PAC_LEAF END (__strcmp_arm) diff --git a/string/arm/strlen-armv6t2.S b/string/arm/strlen-armv6t2.S index 6e0352d..f06b238 100644 --- a/string/arm/strlen-armv6t2.S +++ b/string/arm/strlen-armv6t2.S @@ -14,7 +14,6 @@ */ #include "../asmdefs.h" -#include "../pacbti.h" #ifdef __ARMEB__ #define S2LO lsl @@ -47,7 +46,7 @@ #define tmp2 r5 ENTRY (__strlen_armv6t2) - prologue 4 5 savepac=HAVE_PAC_LEAF + prologue 4 5 push_ip=HAVE_PAC_LEAF pld [srcin, #0] bic src, srcin, #7 mvn const_m1, #0 @@ -98,6 +97,7 @@ L(start_realigned): beq L(loop_aligned) L(null_found): + .cfi_remember_state cmp data1a, #0 itt eq addeq result, result, #4 @@ -107,9 +107,10 @@ L(null_found): #endif clz data1a, data1a add result, result, data1a, lsr #3 /* Bits -> Bytes. */ - epilogue 4 5 savepac=HAVE_PAC_LEAF + epilogue 4 5 push_ip=HAVE_PAC_LEAF L(misaligned8): + .cfi_restore_state ldrd data1a, data1b, [src] and tmp2, tmp1, #3 rsb result, tmp1, #0 |