aboutsummaryrefslogtreecommitdiff
path: root/src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S')
-rw-r--r--src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S4
1 files changed, 1 insertions, 3 deletions
diff --git a/src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S b/src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S
index 4f869950b..d9460005f 100644
--- a/src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S
+++ b/src/f32-gemm/gen/6x8-aarch64-neonfma-ld64.S
@@ -126,7 +126,7 @@ BEGIN_FUNCTION xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_ld64
# Is there at least 2 floats (8 bytes) for main loop?
SUBS x0, x2, 8 // k = kc - 8
- B.LO 2f
+ B.LO 4f
# Main loop - 2 floats of A (8 bytes)
# 24 FMA + 6 LD64 A + 2 LDP B
@@ -167,7 +167,6 @@ BEGIN_FUNCTION xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_ld64
FMLA v31.4s, v19.4s, v5.s[1]
B.HS 1b
-2:
# Is there a remainder?- 1 floats of A (4 bytes)
TBNZ x0, 2, 4f
3:
@@ -215,7 +214,6 @@ BEGIN_FUNCTION xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_ld64
SUB x4, x4, x2 // a5 -= kc
B.HI 0b
-
RET
4: