diff options
Diffstat (limited to 'string/aarch64/strncmp-mte.S')
-rw-r--r-- | string/aarch64/strncmp-mte.S | 12 |
1 files changed, 4 insertions, 8 deletions
diff --git a/string/aarch64/strncmp-mte.S b/string/aarch64/strncmp-mte.S index 42f1a3b..c6dbe0b 100644 --- a/string/aarch64/strncmp-mte.S +++ b/string/aarch64/strncmp-mte.S @@ -54,11 +54,7 @@ #endif .text - .p2align 6 - .rep 9 - nop /* Pad so that the loop below fits a cache line. */ - .endr -ENTRY_ALIGN (__strncmp_aarch64_mte, 0) +ENTRY (__strncmp_aarch64_mte) PTR_ARG (0) PTR_ARG (1) SIZE_ARG (2) @@ -73,7 +69,7 @@ ENTRY_ALIGN (__strncmp_aarch64_mte, 0) /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and can be done in parallel across the entire word. */ - /* Start of performance-critical section -- one 64B cache line. */ + .p2align 4 L(loop_aligned): ldr data1, [src1], #8 ldr data2, [src2], #8 @@ -86,7 +82,7 @@ L(start_realigned): bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ ccmp endloop, #0, #0, eq b.eq L(loop_aligned) - /* End of performance-critical section -- one 64B cache line. */ + /* End of main loop */ L(full_check): #ifndef __AARCH64EB__ @@ -178,7 +174,7 @@ L(mutual_align): orr data2, data2, tmp2 b L(start_realigned) - .p2align 6 + .p2align 4 /* Don't bother with dwords for up to 16 bytes. */ L(misaligned8): cmp limit, #16 |