Fix FILTERBANK-ACCUMULATE microkernels

Make FILTERBANK-ACCUMULATE microkernels match TFLM audio_frontend semantics PiperOrigin-RevId: 469635373
author: Marat Dukhan <maratek@google.com> 2022-08-23 22:05:36 -0700
committer: XNNPACK Team <xnnpack-github-robot@google.com> 2022-08-23 22:06:35 -0700
commit: 2c02fb77c4323339014390a5d377182419bacd7f (patch)
tree: 52c7ec9c0f69e5c9341b06d91f3639c29b2c935c
parent: f133344411a0fc6ebf1a3b1518321b61e2e759b9 (diff)
download: XNNPACK-2c02fb77c4323339014390a5d377182419bacd7f.tar.gz
14 files changed, 4 insertions, 399 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index a11e9d439..35c7a89dc 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -4214,8 +4214,6 @@ ALL_NEON_MICROKERNEL_SRCS = [
     "src/u8-maxpool/9p8x-minmax-neon-c16.c",
     "src/u8-rmax/neon.c",
     "src/u8-vclamp/neon-x64.c",
-    "src/u32-filterbank-accumulate/gen/neon-x1.c",
-    "src/u32-filterbank-accumulate/gen/neon-x2.c",
     "src/xx-fill/neon-x64.c",
     "src/xx-pad/neon.c",
     "src/x8-transposec/gen/8x8-multi-dec-zip-neon.c",
@@ -8309,8 +8307,6 @@ AARCH32_ASM_MICROKERNEL_SRCS = [
     "src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a7.S",
     "src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a53.S",
     "src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-ld64.S",
-    "src/u32-filterbank-accumulate/aarch32-neon-x1.S",
-    "src/u32-filterbank-accumulate/aarch32-neon-x2.S",
 ]
 
 AARCH64_ASM_MICROKERNEL_SRCS = [
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3dce2f237..c7cfb7c4e 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2708,8 +2708,6 @@ SET(ALL_NEON_MICROKERNEL_SRCS
   src/u8-maxpool/9p8x-minmax-neon-c16.c
   src/u8-rmax/neon.c
   src/u8-vclamp/neon-x64.c
-  src/u32-filterbank-accumulate/gen/neon-x1.c
-  src/u32-filterbank-accumulate/gen/neon-x2.c
   src/xx-fill/neon-x64.c
   src/xx-pad/neon.c
   src/x8-transposec/gen/8x8-multi-dec-zip-neon.c
@@ -6750,9 +6748,7 @@ SET(AARCH32_ASM_MICROKERNEL_SRCS
   src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-ld64.S
   src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a7.S
   src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a53.S
-  src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-ld64.S
-  src/u32-filterbank-accumulate/aarch32-neon-x1.S
-  src/u32-filterbank-accumulate/aarch32-neon-x2.S)
+  src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-ld64.S)
 
 SET(AARCH64_ASM_MICROKERNEL_SRCS
   src/f16-gemm/gen-inc/1x8inc-minmax-aarch64-neonfp16arith-ld64.S
diff --git a/bench/u32-filterbank-accumulate.cc b/bench/u32-filterbank-accumulate.cc
index ba064f945..948998339 100644
--- a/bench/u32-filterbank-accumulate.cc
+++ b/bench/u32-filterbank-accumulate.cc
@@ -67,16 +67,6 @@ static void BenchmarkKernelSize(benchmark::internal::Benchmark* b)
   b->Args({1, 13});
 }
 
-#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
-BENCHMARK_CAPTURE(filterbank_accumulate, u32_aarch32_neon_x1,  xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1,  benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime();
-BENCHMARK_CAPTURE(filterbank_accumulate, u32_aarch32_neon_x2,  xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2,  benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime();
-#endif  // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-BENCHMARK_CAPTURE(filterbank_accumulate, u32_neon_x1,  xnn_u32_filterbank_accumulate_ukernel__neon_x1,  benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime();
-BENCHMARK_CAPTURE(filterbank_accumulate, u32_neon_x2,  xnn_u32_filterbank_accumulate_ukernel__neon_x2,  benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime();
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
 BENCHMARK_CAPTURE(filterbank_accumulate, u32_scalar_x1, xnn_u32_filterbank_accumulate_ukernel__scalar_x1)->Apply(BenchmarkKernelSize)->UseRealTime();
 
 #ifndef XNNPACK_BENCHMARK_NO_MAIN
diff --git a/scripts/generate-u32-filterbank-accumulate.sh b/scripts/generate-u32-filterbank-accumulate.sh
index 36f44f266..f2dee01d0 100755
--- a/scripts/generate-u32-filterbank-accumulate.sh
+++ b/scripts/generate-u32-filterbank-accumulate.sh
@@ -7,10 +7,6 @@
 ################################### SCALAR ###################################
 tools/xngen src/u32-filterbank-accumulate/scalar.c.in -D BATCH_TILE=1 -o src/u32-filterbank-accumulate/gen/scalar-x1.c &
 
-################################### NEON ###################################
-tools/xngen src/u32-filterbank-accumulate/neon.c.in -D BATCH_TILE=1 -o src/u32-filterbank-accumulate/gen/neon-x1.c &
-tools/xngen src/u32-filterbank-accumulate/neon.c.in -D BATCH_TILE=2 -o src/u32-filterbank-accumulate/gen/neon-x2.c &
-
 ################################## Unit tests #################################
 tools/generate-filterbank-accumulate-test.py --spec test/u32-filterbank-accumulate.yaml --output test/u32-filterbank-accumulate.cc &
 
diff --git a/src/u32-filterbank-accumulate/aarch32-neon-x1.S b/src/u32-filterbank-accumulate/aarch32-neon-x1.S
deleted file mode 100644
index bd5ad932a..000000000
--- a/src/u32-filterbank-accumulate/aarch32-neon-x1.S
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <xnnpack/assembly.h>
-
-.syntax unified
-
-// void xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1(
-//     size_t rows,                          r0
-//     const uint32_t* input,                r1
-//     const uint8_t* weight_widths,         r2
-//     const uint16_t* weights,              r3
-//     uint64_t* output)                     sp -> r12
-
-// d8-d15, r12-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved.
-
-// Register usage
-// input   r1  d2
-// weights r3  d3 d4 d5
-// output  r12 d0 d1
-
-// weight_widths r2 r4
-
-BEGIN_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1
-        .arm
-#ifndef __APPLE__
-        .arch   armv7-a
-        .fpu    neon
-#endif
-        LDR     r12, [sp]               // output
-        PUSH    {r4}                    // push 4 bytes
-        VMOV.U8 q0, #0                  // weight_accumulator
-0:
-        LDRB    r4, [r2], #1            // weight_widths
-
-1:
-        VLD1.32 {d3[]}, [r3]!           // weights
-        VLD1.32 {d2[]}, [r1]!           // input
-        SUBS    r4, r4, #1
-        VMOVL.U16 q2, d3
-        VMLAL.U32 q0, d4, d2
-        BHI     1b
-
-        VST1.64 {d0}, [r12]!
-        SUBS    r0, r0, #1
-        VMOV    d0, d1
-        BNE     0b
-
-        POP     {r4}
-        BX      lr
-
-END_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1
-
-#ifdef __ELF__
-.section ".note.GNU-stack","",%progbits
-#endif
diff --git a/src/u32-filterbank-accumulate/aarch32-neon-x2.S b/src/u32-filterbank-accumulate/aarch32-neon-x2.S
deleted file mode 100644
index 5c8cc3a00..000000000
--- a/src/u32-filterbank-accumulate/aarch32-neon-x2.S
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <xnnpack/assembly.h>
-
-.syntax unified
-
-// void xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2(
-//     size_t rows,                          r0
-//     const uint32_t* input,                r1
-//     const uint8_t* weight_widths,         r2
-//     const uint16_t* weights,              r3
-//     uint64_t* output)                     sp -> r12
-
-// d8-d15, r12-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved.
-
-// Register usage
-// input   r1  d2
-// weights r3  d3 d4 d5
-// output  r12 d0 d1
-
-// weight_widths r2 r4
-
-BEGIN_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2
-        .arm
-#ifndef __APPLE__
-        .arch   armv7-a
-        .fpu    neon
-#endif
-        LDR     r12, [sp]               // output
-        PUSH    {r4}                    // push 4 bytes
-        VMOV.U8 q0, #0                  // weight_accumulator
-0:
-        LDRB    r4, [r2], #1            // weight_widths
-        SUBS    r4, r4, #1
-        BLS     2f                      // less than 2 weights?
-
-1:
-        VLD1.16 {d3}, [r3]!             // weights
-        VLD1.32 {d2}, [r1]!             // input
-        SUBS    r4, r4, #2
-        VMOVL.U16 q2, d3
-        VMLAL.U32 q0, d4, d2[0]
-        VMLAL.U32 q0, d5, d2[1]
-        BHI     1b
-
-        BLO     3f                      // is there a remainder?
-2:
-        VLD1.32 {d3[]}, [r3]!           // weights
-        VLD1.32 {d2[]}, [r1]!           // input
-        VMOVL.U16 q2, d3
-        VMLAL.U32 q0, d4, d2
-
-3:
-        VST1.64 {d0}, [r12]!
-        SUBS    r0, r0, #1
-        VMOV    d0, d1
-        BNE     0b
-
-        POP     {r4}
-        BX      lr
-
-END_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2
-
-#ifdef __ELF__
-.section ".note.GNU-stack","",%progbits
-#endif
diff --git a/src/u32-filterbank-accumulate/gen/neon-x1.c b/src/u32-filterbank-accumulate/gen/neon-x1.c
deleted file mode 100644
index d57abef42..000000000
--- a/src/u32-filterbank-accumulate/gen/neon-x1.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// Auto-generated file. Do not edit!
-//   Template: src/u32-filterbank-accumulate/neon.c.in
-//   Generator: tools/xngen
-//
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/math.h>
-#include <xnnpack/filterbank.h>
-
-
-void xnn_u32_filterbank_accumulate_ukernel__neon_x1(
-    size_t rows,
-    const uint32_t* input,
-    const uint8_t* weight_widths,
-    const uint16_t* weights,
-    uint64_t* output) {
-
-  assert(rows != 0);
-  assert(input != NULL);
-  assert(weight_widths != NULL);
-  assert(weights != NULL);
-  assert(output != NULL);
-
-  uint64x2_t weight_accumulator = vdupq_n_u64(0);
-
-  do {
-    size_t n = (size_t) *weight_widths++;
-    assert(n != 0);
-
-    if (n != 0) {
-      do {
-        const uint32x2_t vi = vld1_dup_u32(input); input += 1;
-        const uint16x4_t vw = vreinterpret_u16_u32(vld1_dup_u32((const void*) weights)); weights += 2;
-        const uint32x2_t vw32 = vget_low_u32(vmovl_u16(vw));
-
-        weight_accumulator = vmlal_u32(weight_accumulator, vw32, vi);
-      } while (--n != 0);
-    }
-
-    vst1_u64(output, vget_low_u64(weight_accumulator));  output += 1;
-    weight_accumulator = vcombine_u64(vget_high_u64(weight_accumulator), vget_high_u64(weight_accumulator));
-
-  } while (--rows != 0);
-}
diff --git a/src/u32-filterbank-accumulate/gen/neon-x2.c b/src/u32-filterbank-accumulate/gen/neon-x2.c
deleted file mode 100644
index ad7aab503..000000000
--- a/src/u32-filterbank-accumulate/gen/neon-x2.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// Auto-generated file. Do not edit!
-//   Template: src/u32-filterbank-accumulate/neon.c.in
-//   Generator: tools/xngen
-//
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/math.h>
-#include <xnnpack/filterbank.h>
-
-
-void xnn_u32_filterbank_accumulate_ukernel__neon_x2(
-    size_t rows,
-    const uint32_t* input,
-    const uint8_t* weight_widths,
-    const uint16_t* weights,
-    uint64_t* output) {
-
-  assert(rows != 0);
-  assert(input != NULL);
-  assert(weight_widths != NULL);
-  assert(weights != NULL);
-  assert(output != NULL);
-
-  uint64x2_t weight_accumulator = vdupq_n_u64(0);
-
-  do {
-    size_t n = (size_t) *weight_widths++;
-    assert(n != 0);
-
-    for (;n >= 2; n -= 2) {
-      const uint32x2_t vi = vld1_u32(input); input += 2;
-      const uint16x4_t vw = vld1_u16(weights); weights += 4;
-      const uint32x4_t vw32 = vmovl_u16(vw);
-
-      weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0);
-      weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1);
-    }
-    if (n != 0) {
-      do {
-        const uint32x2_t vi = vld1_dup_u32(input); input += 1;
-        const uint16x4_t vw = vreinterpret_u16_u32(vld1_dup_u32((const void*) weights)); weights += 2;
-        const uint32x2_t vw32 = vget_low_u32(vmovl_u16(vw));
-
-        weight_accumulator = vmlal_u32(weight_accumulator, vw32, vi);
-      } while (--n != 0);
-    }
-
-    vst1_u64(output, vget_low_u64(weight_accumulator));  output += 1;
-    weight_accumulator = vcombine_u64(vget_high_u64(weight_accumulator), vget_high_u64(weight_accumulator));
-
-  } while (--rows != 0);
-}
diff --git a/src/u32-filterbank-accumulate/gen/scalar-x1.c b/src/u32-filterbank-accumulate/gen/scalar-x1.c
index b0e8e5749..8ec5c0771 100644
--- a/src/u32-filterbank-accumulate/gen/scalar-x1.c
+++ b/src/u32-filterbank-accumulate/gen/scalar-x1.c
@@ -50,6 +50,7 @@ void xnn_u32_filterbank_accumulate_ukernel__scalar_x1(
 
     *output++ = weight_accumulator;
     weight_accumulator = unweight_accumulator;
+    unweight_accumulator = 0;
 
   } while (--rows != 0);
 }
diff --git a/src/u32-filterbank-accumulate/neon.c.in b/src/u32-filterbank-accumulate/neon.c.in
deleted file mode 100644
index 4038107f8..000000000
--- a/src/u32-filterbank-accumulate/neon.c.in
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2022 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include <arm_neon.h>
-
-#include <xnnpack/math.h>
-#include <xnnpack/filterbank.h>
-
-
-void xnn_u32_filterbank_accumulate_ukernel__neon_x${BATCH_TILE}(
-    size_t rows,
-    const uint32_t* input,
-    const uint8_t* weight_widths,
-    const uint16_t* weights,
-    uint64_t* output) {
-
-  assert(rows != 0);
-  assert(input != NULL);
-  assert(weight_widths != NULL);
-  assert(weights != NULL);
-  assert(output != NULL);
-
-  uint64x2_t weight_accumulator = vdupq_n_u64(0);
-
-  do {
-    size_t n = (size_t) *weight_widths++;
-    assert(n != 0);
-
-    $if BATCH_TILE == 2:
-      for (;n >= 2; n -= 2) {
-        const uint32x2_t vi = vld1_u32(input); input += 2;
-        const uint16x4_t vw = vld1_u16(weights); weights += 4;
-        const uint32x4_t vw32 = vmovl_u16(vw);
-
-        weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0);
-        weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1);
-      }
-    if (n != 0) {
-      do {
-        const uint32x2_t vi = vld1_dup_u32(input); input += 1;
-        const uint16x4_t vw = vreinterpret_u16_u32(vld1_dup_u32((const void*) weights)); weights += 2;
-        const uint32x2_t vw32 = vget_low_u32(vmovl_u16(vw));
-
-        weight_accumulator = vmlal_u32(weight_accumulator, vw32, vi);
-      } while (--n != 0);
-    }
-
-    vst1_u64(output, vget_low_u64(weight_accumulator));  output += 1;
-    weight_accumulator = vcombine_u64(vget_high_u64(weight_accumulator), vget_high_u64(weight_accumulator));
-
-  } while (--rows != 0);
-}
diff --git a/src/u32-filterbank-accumulate/scalar.c.in b/src/u32-filterbank-accumulate/scalar.c.in
index cdcf3bb4c..b9dc80c1a 100644
--- a/src/u32-filterbank-accumulate/scalar.c.in
+++ b/src/u32-filterbank-accumulate/scalar.c.in
@@ -47,6 +47,7 @@ void xnn_u32_filterbank_accumulate_ukernel__scalar_x${BATCH_TILE}(
 
     *output++ = weight_accumulator;
     weight_accumulator = unweight_accumulator;
+    unweight_accumulator = 0;
 
   } while (--rows != 0);
 }
diff --git a/test/filterbank-accumulate-microkernel-tester.h b/test/filterbank-accumulate-microkernel-tester.h
index 6a85e81df..4a1a705a0 100644
--- a/test/filterbank-accumulate-microkernel-tester.h
+++ b/test/filterbank-accumulate-microkernel-tester.h
@@ -76,6 +76,7 @@ class FilterbankAccumulateMicrokernelTester {
         }
         output_ref[m] = weight_accumulator;
         weight_accumulator = unweight_accumulator;
+        unweight_accumulator = 0;
       }
 
       // Call optimized micro-kernel.
diff --git a/test/u32-filterbank-accumulate.cc b/test/u32-filterbank-accumulate.cc
index 4929af500..655c9e810 100644
--- a/test/u32-filterbank-accumulate.cc
+++ b/test/u32-filterbank-accumulate.cc
@@ -17,74 +17,6 @@
 #include "filterbank-accumulate-microkernel-tester.h"
 
 
-#if XNN_ARCH_ARM
-  TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X1, rows_eq_1) {
-    TEST_REQUIRES_ARM_NEON;
-    FilterbankAccumulateMicrokernelTester()
-      .rows(1)
-      .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1);
-  }
-
-  TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X1, rows_eq_2) {
-    TEST_REQUIRES_ARM_NEON;
-    FilterbankAccumulateMicrokernelTester()
-      .rows(2)
-      .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1);
-  }
-#endif  // XNN_ARCH_ARM
-
-
-#if XNN_ARCH_ARM
-  TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X2, rows_eq_1) {
-    TEST_REQUIRES_ARM_NEON;
-    FilterbankAccumulateMicrokernelTester()
-      .rows(1)
-      .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2);
-  }
-
-  TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X2, rows_eq_2) {
-    TEST_REQUIRES_ARM_NEON;
-    FilterbankAccumulateMicrokernelTester()
-      .rows(2)
-      .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2);
-  }
-#endif  // XNN_ARCH_ARM
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_1) {
-    TEST_REQUIRES_ARM_NEON;
-    FilterbankAccumulateMicrokernelTester()
-      .rows(1)
-      .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1);
-  }
-
-  TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_2) {
-    TEST_REQUIRES_ARM_NEON;
-    FilterbankAccumulateMicrokernelTester()
-      .rows(2)
-      .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1);
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_1) {
-    TEST_REQUIRES_ARM_NEON;
-    FilterbankAccumulateMicrokernelTester()
-      .rows(1)
-      .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2);
-  }
-
-  TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_2) {
-    TEST_REQUIRES_ARM_NEON;
-    FilterbankAccumulateMicrokernelTester()
-      .rows(2)
-      .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2);
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
 TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, rows_eq_1) {
   FilterbankAccumulateMicrokernelTester()
     .rows(1)
diff --git a/test/u32-filterbank-accumulate.yaml b/test/u32-filterbank-accumulate.yaml
index 54798504c..a44f9d45c 100644
--- a/test/u32-filterbank-accumulate.yaml
+++ b/test/u32-filterbank-accumulate.yaml
@@ -3,14 +3,5 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-
-# AArch32 assembly
-- name: xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1
-- name: xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2
-
-# ARM NEON
-- name: xnn_u32_filterbank_accumulate_ukernel__neon_x1
-- name: xnn_u32_filterbank_accumulate_ukernel__neon_x2
-
 # Scalar
 - name: xnn_u32_filterbank_accumulate_ukernel__scalar_x1
author	Marat Dukhan <maratek@google.com>	2022-08-23 22:05:36 -0700
committer	XNNPACK Team <xnnpack-github-robot@google.com>	2022-08-23 22:06:35 -0700
commit	2c02fb77c4323339014390a5d377182419bacd7f (patch)
tree	52c7ec9c0f69e5c9341b06d91f3639c29b2c935c
parent	f133344411a0fc6ebf1a3b1518321b61e2e759b9 (diff)
download	XNNPACK-2c02fb77c4323339014390a5d377182419bacd7f.tar.gz