filterbank accumulate neon use for loop instead of do/while

PiperOrigin-RevId: 469551042
author: Frank Barchard <fbarchard@google.com> 2022-08-23 14:03:22 -0700
committer: XNNPACK Team <xnnpack-github-robot@google.com> 2022-08-23 14:04:25 -0700
commit: 317950c01f4db6676986ec1837caacef6c845e70 (patch)
tree: 508b0f38c4b93a3c8e9ea9da1b5b60daec24507a
parent: 0cc320718a9940dce5aba58130290c3760a840b4 (diff)
download: XNNPACK-317950c01f4db6676986ec1837caacef6c845e70.tar.gz
2 files changed, 13 insertions, 19 deletions
diff --git a/src/u32-filterbank-accumulate/gen/neon-x2.c b/src/u32-filterbank-accumulate/gen/neon-x2.c
index 155516885..ad7aab503 100644
--- a/src/u32-filterbank-accumulate/gen/neon-x2.c
+++ b/src/u32-filterbank-accumulate/gen/neon-x2.c
@@ -36,16 +36,13 @@ void xnn_u32_filterbank_accumulate_ukernel__neon_x2(
     size_t n = (size_t) *weight_widths++;
     assert(n != 0);
 
-    if (n >= 2) {
-      do {
-        const uint32x2_t vi = vld1_u32(input); input += 2;
-        const uint16x4_t vw = vld1_u16(weights); weights += 4;
-        const uint32x4_t vw32 = vmovl_u16(vw);
+    for (;n >= 2; n -= 2) {
+      const uint32x2_t vi = vld1_u32(input); input += 2;
+      const uint16x4_t vw = vld1_u16(weights); weights += 4;
+      const uint32x4_t vw32 = vmovl_u16(vw);
 
-        weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0);
-        weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1);
-        n -= 2;
-      } while (n >= 2);
+      weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0);
+      weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1);
     }
     if (n != 0) {
       do {
diff --git a/src/u32-filterbank-accumulate/neon.c.in b/src/u32-filterbank-accumulate/neon.c.in
index b51ac5a65..4038107f8 100644
--- a/src/u32-filterbank-accumulate/neon.c.in
+++ b/src/u32-filterbank-accumulate/neon.c.in
@@ -33,16 +33,13 @@ void xnn_u32_filterbank_accumulate_ukernel__neon_x${BATCH_TILE}(
     assert(n != 0);
 
     $if BATCH_TILE == 2:
-      if (n >= 2) {
-        do {
-          const uint32x2_t vi = vld1_u32(input); input += 2;
-          const uint16x4_t vw = vld1_u16(weights); weights += 4;
-          const uint32x4_t vw32 = vmovl_u16(vw);
-
-          weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0);
-          weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1);
-          n -= 2;
-        } while (n >= 2);
+      for (;n >= 2; n -= 2) {
+        const uint32x2_t vi = vld1_u32(input); input += 2;
+        const uint16x4_t vw = vld1_u16(weights); weights += 4;
+        const uint32x4_t vw32 = vmovl_u16(vw);
+
+        weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0);
+        weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1);
       }
     if (n != 0) {
       do {
author	Frank Barchard <fbarchard@google.com>	2022-08-23 14:03:22 -0700
committer	XNNPACK Team <xnnpack-github-robot@google.com>	2022-08-23 14:04:25 -0700
commit	317950c01f4db6676986ec1837caacef6c845e70 (patch)
tree	508b0f38c4b93a3c8e9ea9da1b5b60daec24507a
parent	0cc320718a9940dce5aba58130290c3760a840b4 (diff)
download	XNNPACK-317950c01f4db6676986ec1837caacef6c845e70.tar.gz