diff options
author | Marat Dukhan <maratek@google.com> | 2022-08-23 22:05:36 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-08-23 22:06:35 -0700 |
commit | 2c02fb77c4323339014390a5d377182419bacd7f (patch) | |
tree | 52c7ec9c0f69e5c9341b06d91f3639c29b2c935c | |
parent | f133344411a0fc6ebf1a3b1518321b61e2e759b9 (diff) | |
download | XNNPACK-2c02fb77c4323339014390a5d377182419bacd7f.tar.gz |
Fix FILTERBANK-ACCUMULATE microkernels
Make FILTERBANK-ACCUMULATE microkernels match TFLM audio_frontend semantics
PiperOrigin-RevId: 469635373
-rw-r--r-- | BUILD.bazel | 4 | ||||
-rwxr-xr-x | CMakeLists.txt | 6 | ||||
-rw-r--r-- | bench/u32-filterbank-accumulate.cc | 10 | ||||
-rwxr-xr-x | scripts/generate-u32-filterbank-accumulate.sh | 4 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/aarch32-neon-x1.S | 58 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/aarch32-neon-x2.S | 69 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/gen/neon-x1.c | 53 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/gen/neon-x2.c | 61 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/gen/scalar-x1.c | 1 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/neon.c.in | 58 | ||||
-rw-r--r-- | src/u32-filterbank-accumulate/scalar.c.in | 1 | ||||
-rw-r--r-- | test/filterbank-accumulate-microkernel-tester.h | 1 | ||||
-rw-r--r-- | test/u32-filterbank-accumulate.cc | 68 | ||||
-rw-r--r-- | test/u32-filterbank-accumulate.yaml | 9 |
14 files changed, 4 insertions, 399 deletions
diff --git a/BUILD.bazel b/BUILD.bazel index a11e9d439..35c7a89dc 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -4214,8 +4214,6 @@ ALL_NEON_MICROKERNEL_SRCS = [ "src/u8-maxpool/9p8x-minmax-neon-c16.c", "src/u8-rmax/neon.c", "src/u8-vclamp/neon-x64.c", - "src/u32-filterbank-accumulate/gen/neon-x1.c", - "src/u32-filterbank-accumulate/gen/neon-x2.c", "src/xx-fill/neon-x64.c", "src/xx-pad/neon.c", "src/x8-transposec/gen/8x8-multi-dec-zip-neon.c", @@ -8309,8 +8307,6 @@ AARCH32_ASM_MICROKERNEL_SRCS = [ "src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a7.S", "src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a53.S", "src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-ld64.S", - "src/u32-filterbank-accumulate/aarch32-neon-x1.S", - "src/u32-filterbank-accumulate/aarch32-neon-x2.S", ] AARCH64_ASM_MICROKERNEL_SRCS = [ diff --git a/CMakeLists.txt b/CMakeLists.txt index 3dce2f237..c7cfb7c4e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2708,8 +2708,6 @@ SET(ALL_NEON_MICROKERNEL_SRCS src/u8-maxpool/9p8x-minmax-neon-c16.c src/u8-rmax/neon.c src/u8-vclamp/neon-x64.c - src/u32-filterbank-accumulate/gen/neon-x1.c - src/u32-filterbank-accumulate/gen/neon-x2.c src/xx-fill/neon-x64.c src/xx-pad/neon.c src/x8-transposec/gen/8x8-multi-dec-zip-neon.c @@ -6750,9 +6748,7 @@ SET(AARCH32_ASM_MICROKERNEL_SRCS src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-ld64.S src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a7.S src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-cortex-a53.S - src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-ld64.S - src/u32-filterbank-accumulate/aarch32-neon-x1.S - src/u32-filterbank-accumulate/aarch32-neon-x2.S) + src/qu8-igemm/gen/4x8-minmax-rndnu-aarch32-neon-mlal-lane-prfm-ld64.S) SET(AARCH64_ASM_MICROKERNEL_SRCS src/f16-gemm/gen-inc/1x8inc-minmax-aarch64-neonfp16arith-ld64.S diff --git a/bench/u32-filterbank-accumulate.cc b/bench/u32-filterbank-accumulate.cc index ba064f945..948998339 100644 --- a/bench/u32-filterbank-accumulate.cc +++ b/bench/u32-filterbank-accumulate.cc @@ -67,16 +67,6 @@ static void BenchmarkKernelSize(benchmark::internal::Benchmark* b) b->Args({1, 13}); } -#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY -BENCHMARK_CAPTURE(filterbank_accumulate, u32_aarch32_neon_x1, xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime(); -BENCHMARK_CAPTURE(filterbank_accumulate, u32_aarch32_neon_x2, xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime(); -#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 -BENCHMARK_CAPTURE(filterbank_accumulate, u32_neon_x1, xnn_u32_filterbank_accumulate_ukernel__neon_x1, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime(); -BENCHMARK_CAPTURE(filterbank_accumulate, u32_neon_x2, xnn_u32_filterbank_accumulate_ukernel__neon_x2, benchmark::utils::CheckNEON)->Apply(BenchmarkKernelSize)->UseRealTime(); -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - BENCHMARK_CAPTURE(filterbank_accumulate, u32_scalar_x1, xnn_u32_filterbank_accumulate_ukernel__scalar_x1)->Apply(BenchmarkKernelSize)->UseRealTime(); #ifndef XNNPACK_BENCHMARK_NO_MAIN diff --git a/scripts/generate-u32-filterbank-accumulate.sh b/scripts/generate-u32-filterbank-accumulate.sh index 36f44f266..f2dee01d0 100755 --- a/scripts/generate-u32-filterbank-accumulate.sh +++ b/scripts/generate-u32-filterbank-accumulate.sh @@ -7,10 +7,6 @@ ################################### SCALAR ################################### tools/xngen src/u32-filterbank-accumulate/scalar.c.in -D BATCH_TILE=1 -o src/u32-filterbank-accumulate/gen/scalar-x1.c & -################################### NEON ################################### -tools/xngen src/u32-filterbank-accumulate/neon.c.in -D BATCH_TILE=1 -o src/u32-filterbank-accumulate/gen/neon-x1.c & -tools/xngen src/u32-filterbank-accumulate/neon.c.in -D BATCH_TILE=2 -o src/u32-filterbank-accumulate/gen/neon-x2.c & - ################################## Unit tests ################################# tools/generate-filterbank-accumulate-test.py --spec test/u32-filterbank-accumulate.yaml --output test/u32-filterbank-accumulate.cc & diff --git a/src/u32-filterbank-accumulate/aarch32-neon-x1.S b/src/u32-filterbank-accumulate/aarch32-neon-x1.S deleted file mode 100644 index bd5ad932a..000000000 --- a/src/u32-filterbank-accumulate/aarch32-neon-x1.S +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2022 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include <xnnpack/assembly.h> - -.syntax unified - -// void xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1( -// size_t rows, r0 -// const uint32_t* input, r1 -// const uint8_t* weight_widths, r2 -// const uint16_t* weights, r3 -// uint64_t* output) sp -> r12 - -// d8-d15, r12-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved. - -// Register usage -// input r1 d2 -// weights r3 d3 d4 d5 -// output r12 d0 d1 - -// weight_widths r2 r4 - -BEGIN_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1 - .arm -#ifndef __APPLE__ - .arch armv7-a - .fpu neon -#endif - LDR r12, [sp] // output - PUSH {r4} // push 4 bytes - VMOV.U8 q0, #0 // weight_accumulator -0: - LDRB r4, [r2], #1 // weight_widths - -1: - VLD1.32 {d3[]}, [r3]! // weights - VLD1.32 {d2[]}, [r1]! // input - SUBS r4, r4, #1 - VMOVL.U16 q2, d3 - VMLAL.U32 q0, d4, d2 - BHI 1b - - VST1.64 {d0}, [r12]! - SUBS r0, r0, #1 - VMOV d0, d1 - BNE 0b - - POP {r4} - BX lr - -END_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1 - -#ifdef __ELF__ -.section ".note.GNU-stack","",%progbits -#endif diff --git a/src/u32-filterbank-accumulate/aarch32-neon-x2.S b/src/u32-filterbank-accumulate/aarch32-neon-x2.S deleted file mode 100644 index 5c8cc3a00..000000000 --- a/src/u32-filterbank-accumulate/aarch32-neon-x2.S +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2022 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include <xnnpack/assembly.h> - -.syntax unified - -// void xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2( -// size_t rows, r0 -// const uint32_t* input, r1 -// const uint8_t* weight_widths, r2 -// const uint16_t* weights, r3 -// uint64_t* output) sp -> r12 - -// d8-d15, r12-r11,r14(lr) need to be preserved if used. r13(sp),r15(pc) are reserved. - -// Register usage -// input r1 d2 -// weights r3 d3 d4 d5 -// output r12 d0 d1 - -// weight_widths r2 r4 - -BEGIN_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2 - .arm -#ifndef __APPLE__ - .arch armv7-a - .fpu neon -#endif - LDR r12, [sp] // output - PUSH {r4} // push 4 bytes - VMOV.U8 q0, #0 // weight_accumulator -0: - LDRB r4, [r2], #1 // weight_widths - SUBS r4, r4, #1 - BLS 2f // less than 2 weights? - -1: - VLD1.16 {d3}, [r3]! // weights - VLD1.32 {d2}, [r1]! // input - SUBS r4, r4, #2 - VMOVL.U16 q2, d3 - VMLAL.U32 q0, d4, d2[0] - VMLAL.U32 q0, d5, d2[1] - BHI 1b - - BLO 3f // is there a remainder? -2: - VLD1.32 {d3[]}, [r3]! // weights - VLD1.32 {d2[]}, [r1]! // input - VMOVL.U16 q2, d3 - VMLAL.U32 q0, d4, d2 - -3: - VST1.64 {d0}, [r12]! - SUBS r0, r0, #1 - VMOV d0, d1 - BNE 0b - - POP {r4} - BX lr - -END_FUNCTION xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2 - -#ifdef __ELF__ -.section ".note.GNU-stack","",%progbits -#endif diff --git a/src/u32-filterbank-accumulate/gen/neon-x1.c b/src/u32-filterbank-accumulate/gen/neon-x1.c deleted file mode 100644 index d57abef42..000000000 --- a/src/u32-filterbank-accumulate/gen/neon-x1.c +++ /dev/null @@ -1,53 +0,0 @@ -// Auto-generated file. Do not edit! -// Template: src/u32-filterbank-accumulate/neon.c.in -// Generator: tools/xngen -// -// Copyright 2022 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include <assert.h> -#include <stddef.h> -#include <stdint.h> - -#include <arm_neon.h> - -#include <xnnpack/math.h> -#include <xnnpack/filterbank.h> - - -void xnn_u32_filterbank_accumulate_ukernel__neon_x1( - size_t rows, - const uint32_t* input, - const uint8_t* weight_widths, - const uint16_t* weights, - uint64_t* output) { - - assert(rows != 0); - assert(input != NULL); - assert(weight_widths != NULL); - assert(weights != NULL); - assert(output != NULL); - - uint64x2_t weight_accumulator = vdupq_n_u64(0); - - do { - size_t n = (size_t) *weight_widths++; - assert(n != 0); - - if (n != 0) { - do { - const uint32x2_t vi = vld1_dup_u32(input); input += 1; - const uint16x4_t vw = vreinterpret_u16_u32(vld1_dup_u32((const void*) weights)); weights += 2; - const uint32x2_t vw32 = vget_low_u32(vmovl_u16(vw)); - - weight_accumulator = vmlal_u32(weight_accumulator, vw32, vi); - } while (--n != 0); - } - - vst1_u64(output, vget_low_u64(weight_accumulator)); output += 1; - weight_accumulator = vcombine_u64(vget_high_u64(weight_accumulator), vget_high_u64(weight_accumulator)); - - } while (--rows != 0); -} diff --git a/src/u32-filterbank-accumulate/gen/neon-x2.c b/src/u32-filterbank-accumulate/gen/neon-x2.c deleted file mode 100644 index ad7aab503..000000000 --- a/src/u32-filterbank-accumulate/gen/neon-x2.c +++ /dev/null @@ -1,61 +0,0 @@ -// Auto-generated file. Do not edit! -// Template: src/u32-filterbank-accumulate/neon.c.in -// Generator: tools/xngen -// -// Copyright 2022 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include <assert.h> -#include <stddef.h> -#include <stdint.h> - -#include <arm_neon.h> - -#include <xnnpack/math.h> -#include <xnnpack/filterbank.h> - - -void xnn_u32_filterbank_accumulate_ukernel__neon_x2( - size_t rows, - const uint32_t* input, - const uint8_t* weight_widths, - const uint16_t* weights, - uint64_t* output) { - - assert(rows != 0); - assert(input != NULL); - assert(weight_widths != NULL); - assert(weights != NULL); - assert(output != NULL); - - uint64x2_t weight_accumulator = vdupq_n_u64(0); - - do { - size_t n = (size_t) *weight_widths++; - assert(n != 0); - - for (;n >= 2; n -= 2) { - const uint32x2_t vi = vld1_u32(input); input += 2; - const uint16x4_t vw = vld1_u16(weights); weights += 4; - const uint32x4_t vw32 = vmovl_u16(vw); - - weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0); - weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1); - } - if (n != 0) { - do { - const uint32x2_t vi = vld1_dup_u32(input); input += 1; - const uint16x4_t vw = vreinterpret_u16_u32(vld1_dup_u32((const void*) weights)); weights += 2; - const uint32x2_t vw32 = vget_low_u32(vmovl_u16(vw)); - - weight_accumulator = vmlal_u32(weight_accumulator, vw32, vi); - } while (--n != 0); - } - - vst1_u64(output, vget_low_u64(weight_accumulator)); output += 1; - weight_accumulator = vcombine_u64(vget_high_u64(weight_accumulator), vget_high_u64(weight_accumulator)); - - } while (--rows != 0); -} diff --git a/src/u32-filterbank-accumulate/gen/scalar-x1.c b/src/u32-filterbank-accumulate/gen/scalar-x1.c index b0e8e5749..8ec5c0771 100644 --- a/src/u32-filterbank-accumulate/gen/scalar-x1.c +++ b/src/u32-filterbank-accumulate/gen/scalar-x1.c @@ -50,6 +50,7 @@ void xnn_u32_filterbank_accumulate_ukernel__scalar_x1( *output++ = weight_accumulator; weight_accumulator = unweight_accumulator; + unweight_accumulator = 0; } while (--rows != 0); } diff --git a/src/u32-filterbank-accumulate/neon.c.in b/src/u32-filterbank-accumulate/neon.c.in deleted file mode 100644 index 4038107f8..000000000 --- a/src/u32-filterbank-accumulate/neon.c.in +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2022 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. - -#include <assert.h> -#include <stddef.h> -#include <stdint.h> - -#include <arm_neon.h> - -#include <xnnpack/math.h> -#include <xnnpack/filterbank.h> - - -void xnn_u32_filterbank_accumulate_ukernel__neon_x${BATCH_TILE}( - size_t rows, - const uint32_t* input, - const uint8_t* weight_widths, - const uint16_t* weights, - uint64_t* output) { - - assert(rows != 0); - assert(input != NULL); - assert(weight_widths != NULL); - assert(weights != NULL); - assert(output != NULL); - - uint64x2_t weight_accumulator = vdupq_n_u64(0); - - do { - size_t n = (size_t) *weight_widths++; - assert(n != 0); - - $if BATCH_TILE == 2: - for (;n >= 2; n -= 2) { - const uint32x2_t vi = vld1_u32(input); input += 2; - const uint16x4_t vw = vld1_u16(weights); weights += 4; - const uint32x4_t vw32 = vmovl_u16(vw); - - weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_low_u32(vw32), vi, 0); - weight_accumulator = vmlal_lane_u32(weight_accumulator, vget_high_u32(vw32), vi, 1); - } - if (n != 0) { - do { - const uint32x2_t vi = vld1_dup_u32(input); input += 1; - const uint16x4_t vw = vreinterpret_u16_u32(vld1_dup_u32((const void*) weights)); weights += 2; - const uint32x2_t vw32 = vget_low_u32(vmovl_u16(vw)); - - weight_accumulator = vmlal_u32(weight_accumulator, vw32, vi); - } while (--n != 0); - } - - vst1_u64(output, vget_low_u64(weight_accumulator)); output += 1; - weight_accumulator = vcombine_u64(vget_high_u64(weight_accumulator), vget_high_u64(weight_accumulator)); - - } while (--rows != 0); -} diff --git a/src/u32-filterbank-accumulate/scalar.c.in b/src/u32-filterbank-accumulate/scalar.c.in index cdcf3bb4c..b9dc80c1a 100644 --- a/src/u32-filterbank-accumulate/scalar.c.in +++ b/src/u32-filterbank-accumulate/scalar.c.in @@ -47,6 +47,7 @@ void xnn_u32_filterbank_accumulate_ukernel__scalar_x${BATCH_TILE}( *output++ = weight_accumulator; weight_accumulator = unweight_accumulator; + unweight_accumulator = 0; } while (--rows != 0); } diff --git a/test/filterbank-accumulate-microkernel-tester.h b/test/filterbank-accumulate-microkernel-tester.h index 6a85e81df..4a1a705a0 100644 --- a/test/filterbank-accumulate-microkernel-tester.h +++ b/test/filterbank-accumulate-microkernel-tester.h @@ -76,6 +76,7 @@ class FilterbankAccumulateMicrokernelTester { } output_ref[m] = weight_accumulator; weight_accumulator = unweight_accumulator; + unweight_accumulator = 0; } // Call optimized micro-kernel. diff --git a/test/u32-filterbank-accumulate.cc b/test/u32-filterbank-accumulate.cc index 4929af500..655c9e810 100644 --- a/test/u32-filterbank-accumulate.cc +++ b/test/u32-filterbank-accumulate.cc @@ -17,74 +17,6 @@ #include "filterbank-accumulate-microkernel-tester.h" -#if XNN_ARCH_ARM - TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X1, rows_eq_1) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(1) - .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1); - } - - TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X1, rows_eq_2) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(2) - .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1); - } -#endif // XNN_ARCH_ARM - - -#if XNN_ARCH_ARM - TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X2, rows_eq_1) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(1) - .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2); - } - - TEST(U32_FILTERBANK_ACCUMULATE__AARCH32_NEON_X2, rows_eq_2) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(2) - .Test(xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2); - } -#endif // XNN_ARCH_ARM - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_1) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(1) - .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1); - } - - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X1, rows_eq_2) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(2) - .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x1); - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_1) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(1) - .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2); - } - - TEST(U32_FILTERBANK_ACCUMULATE__NEON_X2, rows_eq_2) { - TEST_REQUIRES_ARM_NEON; - FilterbankAccumulateMicrokernelTester() - .rows(2) - .Test(xnn_u32_filterbank_accumulate_ukernel__neon_x2); - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - TEST(U32_FILTERBANK_ACCUMULATE__SCALAR_X1, rows_eq_1) { FilterbankAccumulateMicrokernelTester() .rows(1) diff --git a/test/u32-filterbank-accumulate.yaml b/test/u32-filterbank-accumulate.yaml index 54798504c..a44f9d45c 100644 --- a/test/u32-filterbank-accumulate.yaml +++ b/test/u32-filterbank-accumulate.yaml @@ -3,14 +3,5 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. - -# AArch32 assembly -- name: xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x1 -- name: xnn_u32_filterbank_accumulate_ukernel__aarch32_neon_x2 - -# ARM NEON -- name: xnn_u32_filterbank_accumulate_ukernel__neon_x1 -- name: xnn_u32_filterbank_accumulate_ukernel__neon_x2 - # Scalar - name: xnn_u32_filterbank_accumulate_ukernel__scalar_x1 |