aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2022-07-28 14:09:52 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-07-28 14:10:42 -0700
commita40c3b2be6348b684bb4a44fdb5c42b9b3cb6db5 (patch)
tree76913d34f7358eb57fbb24b61bfb638982918985
parentf77cd4974c180b8569239dc4dd4fc3c0196c1e9c (diff)
downloadXNNPACK-a40c3b2be6348b684bb4a44fdb5c42b9b3cb6db5.tar.gz
CS16 squareabs microkernel
- Scalar implementation PiperOrigin-RevId: 463921828
-rw-r--r--BUILD.bazel22
-rwxr-xr-xCMakeLists.txt13
-rw-r--r--bench/cs16-vsquareabs.cc68
-rwxr-xr-xscripts/generate-cs16-vsquareabs.sh16
-rw-r--r--src/cs16-vsquareabs/gen/scalar-x1.c42
-rw-r--r--src/cs16-vsquareabs/gen/scalar-x2.c61
-rw-r--r--src/cs16-vsquareabs/gen/scalar-x3.c67
-rw-r--r--src/cs16-vsquareabs/gen/scalar-x4.c73
-rw-r--r--src/cs16-vsquareabs/scalar.c.in57
-rw-r--r--src/xnnpack/params.h5
-rw-r--r--src/xnnpack/vsquareabs.h33
-rw-r--r--test/cs16-vsquareabs.cc125
-rw-r--r--test/cs16-vsquareabs.yaml11
-rw-r--r--test/vsquareabs-microkernel-tester.h81
-rwxr-xr-xtools/generate-vsquareabs-test.py155
15 files changed, 829 insertions, 0 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index 208175616..cde501142 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -620,6 +620,10 @@ ALL_SCALAR_MICROKERNEL_SRCS = [
"src/s16-vlshift/gen/scalar-x2.c",
"src/s16-vlshift/gen/scalar-x3.c",
"src/s16-vlshift/gen/scalar-x4.c",
+ "src/cs16-vsquareabs/gen/scalar-x1.c",
+ "src/cs16-vsquareabs/gen/scalar-x2.c",
+ "src/cs16-vsquareabs/gen/scalar-x3.c",
+ "src/cs16-vsquareabs/gen/scalar-x4.c",
"src/f16-f32-vcvt/gen/vcvt-scalar-x1.c",
"src/f16-f32-vcvt/gen/vcvt-scalar-x2.c",
"src/f16-f32-vcvt/gen/vcvt-scalar-x3.c",
@@ -8521,6 +8525,7 @@ INTERNAL_MICROKERNEL_HDRS = [
"src/xnnpack/vmulcaddc.h",
"src/xnnpack/vscaleexpminusmax.h",
"src/xnnpack/vscaleextexp.h",
+ "src/xnnpack/vsquareabs.h",
"src/xnnpack/vunary.h",
"src/xnnpack/window.h",
"src/xnnpack/zip.h",
@@ -11765,6 +11770,14 @@ xnnpack_benchmark(
)
xnnpack_benchmark(
+ name = "cs16_vsquareabs_bench",
+ srcs = [
+ "bench/cs16-vsquareabs.cc",
+ ],
+ deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
+xnnpack_benchmark(
name = "x8_lut_bench",
srcs = [
"bench/x8-lut.cc",
@@ -14096,6 +14109,15 @@ xnnpack_unit_test(
)
xnnpack_unit_test(
+ name = "cs16_vsquareabs_test",
+ srcs = [
+ "test/cs16-vsquareabs.cc",
+ "test/vsquareabs-microkernel-tester.h",
+ ],
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
+xnnpack_unit_test(
name = "s8_ibilinear_test",
srcs = [
"test/ibilinear-microkernel-tester.h",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea1bf1fd1..e91fe40d6 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -607,6 +607,10 @@ SET(ALL_SCALAR_MICROKERNEL_SRCS
src/s16-vlshift/gen/scalar-x2.c
src/s16-vlshift/gen/scalar-x3.c
src/s16-vlshift/gen/scalar-x4.c
+ src/cs16-vsquareabs/gen/scalar-x1.c
+ src/cs16-vsquareabs/gen/scalar-x2.c
+ src/cs16-vsquareabs/gen/scalar-x3.c
+ src/cs16-vsquareabs/gen/scalar-x4.c
src/f16-f32-vcvt/gen/vcvt-scalar-x1.c
src/f16-f32-vcvt/gen/vcvt-scalar-x2.c
src/f16-f32-vcvt/gen/vcvt-scalar-x3.c
@@ -8637,6 +8641,11 @@ IF(XNNPACK_BUILD_TESTS)
TARGET_LINK_LIBRARIES(s16-vlshift-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main)
ADD_TEST(NAME s16-vlshift-test COMMAND s16-vlshift-test)
+ ADD_EXECUTABLE(cs16-vsquareabs-test test/cs16-vsquareabs.cc $<TARGET_OBJECTS:all_microkernels>)
+ TARGET_INCLUDE_DIRECTORIES(cs16-vsquareabs-test PRIVATE include src test)
+ TARGET_LINK_LIBRARIES(cs16-vsquareabs-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main)
+ ADD_TEST(NAME cs16-vsquareabs-test COMMAND cs16-vsquareabs-test)
+
ADD_EXECUTABLE(s8-ibilinear-test test/s8-ibilinear.cc $<TARGET_OBJECTS:all_microkernels>)
TARGET_INCLUDE_DIRECTORIES(s8-ibilinear-test PRIVATE include src test)
TARGET_LINK_LIBRARIES(s8-ibilinear-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main)
@@ -9222,6 +9231,10 @@ IF(XNNPACK_BUILD_BENCHMARKS)
TARGET_INCLUDE_DIRECTORIES(s16-vlshift-bench PRIVATE . include src)
TARGET_LINK_LIBRARIES(s16-vlshift-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool)
+ ADD_EXECUTABLE(cs16-vsquareabs-bench bench/cs16-vsquareabs.cc $<TARGET_OBJECTS:all_microkernels>)
+ TARGET_INCLUDE_DIRECTORIES(cs16-vsquareabs-bench PRIVATE . include src)
+ TARGET_LINK_LIBRARIES(cs16-vsquareabs-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool)
+
ADD_EXECUTABLE(xx-transpose-bench bench/x32-transpose.cc $<TARGET_OBJECTS:all_microkernels>)
TARGET_INCLUDE_DIRECTORIES(xx-transpose-bench PRIVATE . include src)
TARGET_LINK_LIBRARIES(xx-transpose-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool)
diff --git a/bench/cs16-vsquareabs.cc b/bench/cs16-vsquareabs.cc
new file mode 100644
index 000000000..b1659f12f
--- /dev/null
+++ b/bench/cs16-vsquareabs.cc
@@ -0,0 +1,68 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <xnnpack/aligned-allocator.h>
+#include <xnnpack/common.h>
+#include <xnnpack/params.h>
+#include <xnnpack/vsquareabs.h>
+
+#include <algorithm>
+#include <cmath>
+#include <functional>
+#include <numeric>
+#include <vector>
+
+#include "bench/utils.h"
+#include <benchmark/benchmark.h>
+
+void vsquareabs(
+ benchmark::State& state,
+ xnn_cs16_vsquareabs_ukernel_function vsquareabs,
+ benchmark::utils::IsaCheckFunction isa_check = nullptr)
+{
+ if (isa_check && !isa_check(state)) {
+ return;
+ }
+ const size_t channels = state.range(0);
+
+ std::vector<int16_t, AlignedAllocator<int16_t, 64>> input(
+ channels * 2 + XNN_EXTRA_BYTES / sizeof(int16_t));
+ std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> output(channels);
+ std::iota(input.begin(), input.end(), 0);
+ std::iota(output.begin(), output.end(), 0);
+
+ for (auto _ : state) {
+ vsquareabs(channels, input.data(), output.data());
+ }
+
+ const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
+ if (cpu_frequency != 0) {
+ state.counters["cpufreq"] = cpu_frequency;
+ }
+}
+
+static void BenchmarkKernelSize(benchmark::internal::Benchmark* b)
+{
+ b->ArgNames({"channels"});
+ b->Args({32});
+ b->Args({64});
+ b->Args({117});
+ b->Args({400});
+ b->Args({1000});
+ b->Args({10000});
+}
+
+BENCHMARK_CAPTURE(vsquareabs, cs16_scalar_x1, xnn_cs16_vsquareabs_ukernel__scalar_x1)
+ ->Apply(BenchmarkKernelSize)->UseRealTime();
+BENCHMARK_CAPTURE(vsquareabs, cs16_scalar_x2, xnn_cs16_vsquareabs_ukernel__scalar_x2)
+ ->Apply(BenchmarkKernelSize)->UseRealTime();
+BENCHMARK_CAPTURE(vsquareabs, cs16_scalar_x3, xnn_cs16_vsquareabs_ukernel__scalar_x3)
+ ->Apply(BenchmarkKernelSize)->UseRealTime();
+BENCHMARK_CAPTURE(vsquareabs, cs16_scalar_x4, xnn_cs16_vsquareabs_ukernel__scalar_x4)
+ ->Apply(BenchmarkKernelSize)->UseRealTime();
+
+#ifndef XNNPACK_BENCHMARK_NO_MAIN
+BENCHMARK_MAIN();
+#endif
diff --git a/scripts/generate-cs16-vsquareabs.sh b/scripts/generate-cs16-vsquareabs.sh
new file mode 100755
index 000000000..59b464d28
--- /dev/null
+++ b/scripts/generate-cs16-vsquareabs.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# Copyright 2022 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+################################### SCALAR ###################################
+tools/xngen src/cs16-vsquareabs/scalar.c.in -D CHANNEL_TILE=1 -o src/cs16-vsquareabs/gen/scalar-x1.c &
+tools/xngen src/cs16-vsquareabs/scalar.c.in -D CHANNEL_TILE=2 -o src/cs16-vsquareabs/gen/scalar-x2.c &
+tools/xngen src/cs16-vsquareabs/scalar.c.in -D CHANNEL_TILE=3 -o src/cs16-vsquareabs/gen/scalar-x3.c &
+tools/xngen src/cs16-vsquareabs/scalar.c.in -D CHANNEL_TILE=4 -o src/cs16-vsquareabs/gen/scalar-x4.c &
+
+################################## Unit tests #################################
+tools/generate-vsquareabs-test.py --spec test/cs16-vsquareabs.yaml --output test/cs16-vsquareabs.cc &
+
+wait
diff --git a/src/cs16-vsquareabs/gen/scalar-x1.c b/src/cs16-vsquareabs/gen/scalar-x1.c
new file mode 100644
index 000000000..23df908f4
--- /dev/null
+++ b/src/cs16-vsquareabs/gen/scalar-x1.c
@@ -0,0 +1,42 @@
+// Auto-generated file. Do not edit!
+// Template: src/cs16-vsquareabs/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack/math.h>
+#include <xnnpack/vsquareabs.h>
+
+
+void xnn_cs16_vsquareabs_ukernel__scalar_x1(
+ size_t channels,
+ const int16_t* input,
+ uint32_t* output) {
+
+ assert(channels != 0);
+ assert(input != NULL);
+ assert(output != NULL);
+
+
+ if XNN_UNLIKELY(channels != 0) {
+ do {
+ const int32_t vr = (int32_t) input[0];
+ const int32_t vi = (int32_t) input[1];
+ input += 2;
+
+ const uint32_t vrsquare = (uint32_t) (vr * vr);
+ const uint32_t visquare = (uint32_t) (vi * vi);
+
+ const uint32_t vout = vrsquare + visquare;
+
+ *output++ = vout;
+ } while (--channels != 0);
+ }
+}
diff --git a/src/cs16-vsquareabs/gen/scalar-x2.c b/src/cs16-vsquareabs/gen/scalar-x2.c
new file mode 100644
index 000000000..0871a0e1a
--- /dev/null
+++ b/src/cs16-vsquareabs/gen/scalar-x2.c
@@ -0,0 +1,61 @@
+// Auto-generated file. Do not edit!
+// Template: src/cs16-vsquareabs/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack/math.h>
+#include <xnnpack/vsquareabs.h>
+
+
+void xnn_cs16_vsquareabs_ukernel__scalar_x2(
+ size_t channels,
+ const int16_t* input,
+ uint32_t* output) {
+
+ assert(channels != 0);
+ assert(input != NULL);
+ assert(output != NULL);
+
+ for (; channels >= 2; channels -= 2) {
+ const int32_t vr0 = (int32_t) input[0];
+ const int32_t vi0 = (int32_t) input[1];
+ const int32_t vr1 = (int32_t) input[2];
+ const int32_t vi1 = (int32_t) input[3];
+ input += 2 * 2;
+
+ const uint32_t vrsquare0 = (uint32_t) (vr0 * vr0);
+ const uint32_t visquare0 = (uint32_t) (vi0 * vi0);
+ const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1);
+ const uint32_t visquare1 = (uint32_t) (vi1 * vi1);
+
+ const uint32_t vout0 = vrsquare0 + visquare0;
+ const uint32_t vout1 = vrsquare1 + visquare1;
+
+ output[0] = vout0;
+ output[1] = vout1;
+ output += 2;
+ }
+
+ if XNN_UNLIKELY(channels != 0) {
+ do {
+ const int32_t vr = (int32_t) input[0];
+ const int32_t vi = (int32_t) input[1];
+ input += 2;
+
+ const uint32_t vrsquare = (uint32_t) (vr * vr);
+ const uint32_t visquare = (uint32_t) (vi * vi);
+
+ const uint32_t vout = vrsquare + visquare;
+
+ *output++ = vout;
+ } while (--channels != 0);
+ }
+}
diff --git a/src/cs16-vsquareabs/gen/scalar-x3.c b/src/cs16-vsquareabs/gen/scalar-x3.c
new file mode 100644
index 000000000..9e3fdcdfb
--- /dev/null
+++ b/src/cs16-vsquareabs/gen/scalar-x3.c
@@ -0,0 +1,67 @@
+// Auto-generated file. Do not edit!
+// Template: src/cs16-vsquareabs/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack/math.h>
+#include <xnnpack/vsquareabs.h>
+
+
+void xnn_cs16_vsquareabs_ukernel__scalar_x3(
+ size_t channels,
+ const int16_t* input,
+ uint32_t* output) {
+
+ assert(channels != 0);
+ assert(input != NULL);
+ assert(output != NULL);
+
+ for (; channels >= 3; channels -= 3) {
+ const int32_t vr0 = (int32_t) input[0];
+ const int32_t vi0 = (int32_t) input[1];
+ const int32_t vr1 = (int32_t) input[2];
+ const int32_t vi1 = (int32_t) input[3];
+ const int32_t vr2 = (int32_t) input[4];
+ const int32_t vi2 = (int32_t) input[5];
+ input += 3 * 2;
+
+ const uint32_t vrsquare0 = (uint32_t) (vr0 * vr0);
+ const uint32_t visquare0 = (uint32_t) (vi0 * vi0);
+ const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1);
+ const uint32_t visquare1 = (uint32_t) (vi1 * vi1);
+ const uint32_t vrsquare2 = (uint32_t) (vr2 * vr2);
+ const uint32_t visquare2 = (uint32_t) (vi2 * vi2);
+
+ const uint32_t vout0 = vrsquare0 + visquare0;
+ const uint32_t vout1 = vrsquare1 + visquare1;
+ const uint32_t vout2 = vrsquare2 + visquare2;
+
+ output[0] = vout0;
+ output[1] = vout1;
+ output[2] = vout2;
+ output += 3;
+ }
+
+ if XNN_UNLIKELY(channels != 0) {
+ do {
+ const int32_t vr = (int32_t) input[0];
+ const int32_t vi = (int32_t) input[1];
+ input += 2;
+
+ const uint32_t vrsquare = (uint32_t) (vr * vr);
+ const uint32_t visquare = (uint32_t) (vi * vi);
+
+ const uint32_t vout = vrsquare + visquare;
+
+ *output++ = vout;
+ } while (--channels != 0);
+ }
+}
diff --git a/src/cs16-vsquareabs/gen/scalar-x4.c b/src/cs16-vsquareabs/gen/scalar-x4.c
new file mode 100644
index 000000000..b4782f422
--- /dev/null
+++ b/src/cs16-vsquareabs/gen/scalar-x4.c
@@ -0,0 +1,73 @@
+// Auto-generated file. Do not edit!
+// Template: src/cs16-vsquareabs/scalar.c.in
+// Generator: tools/xngen
+//
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack/math.h>
+#include <xnnpack/vsquareabs.h>
+
+
+void xnn_cs16_vsquareabs_ukernel__scalar_x4(
+ size_t channels,
+ const int16_t* input,
+ uint32_t* output) {
+
+ assert(channels != 0);
+ assert(input != NULL);
+ assert(output != NULL);
+
+ for (; channels >= 4; channels -= 4) {
+ const int32_t vr0 = (int32_t) input[0];
+ const int32_t vi0 = (int32_t) input[1];
+ const int32_t vr1 = (int32_t) input[2];
+ const int32_t vi1 = (int32_t) input[3];
+ const int32_t vr2 = (int32_t) input[4];
+ const int32_t vi2 = (int32_t) input[5];
+ const int32_t vr3 = (int32_t) input[6];
+ const int32_t vi3 = (int32_t) input[7];
+ input += 4 * 2;
+
+ const uint32_t vrsquare0 = (uint32_t) (vr0 * vr0);
+ const uint32_t visquare0 = (uint32_t) (vi0 * vi0);
+ const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1);
+ const uint32_t visquare1 = (uint32_t) (vi1 * vi1);
+ const uint32_t vrsquare2 = (uint32_t) (vr2 * vr2);
+ const uint32_t visquare2 = (uint32_t) (vi2 * vi2);
+ const uint32_t vrsquare3 = (uint32_t) (vr3 * vr3);
+ const uint32_t visquare3 = (uint32_t) (vi3 * vi3);
+
+ const uint32_t vout0 = vrsquare0 + visquare0;
+ const uint32_t vout1 = vrsquare1 + visquare1;
+ const uint32_t vout2 = vrsquare2 + visquare2;
+ const uint32_t vout3 = vrsquare3 + visquare3;
+
+ output[0] = vout0;
+ output[1] = vout1;
+ output[2] = vout2;
+ output[3] = vout3;
+ output += 4;
+ }
+
+ if XNN_UNLIKELY(channels != 0) {
+ do {
+ const int32_t vr = (int32_t) input[0];
+ const int32_t vi = (int32_t) input[1];
+ input += 2;
+
+ const uint32_t vrsquare = (uint32_t) (vr * vr);
+ const uint32_t visquare = (uint32_t) (vi * vi);
+
+ const uint32_t vout = vrsquare + visquare;
+
+ *output++ = vout;
+ } while (--channels != 0);
+ }
+}
diff --git a/src/cs16-vsquareabs/scalar.c.in b/src/cs16-vsquareabs/scalar.c.in
new file mode 100644
index 000000000..6a4ceeb4d
--- /dev/null
+++ b/src/cs16-vsquareabs/scalar.c.in
@@ -0,0 +1,57 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+$assert CHANNEL_TILE >= 1
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack/math.h>
+#include <xnnpack/vsquareabs.h>
+
+
+void xnn_cs16_vsquareabs_ukernel__scalar_x${CHANNEL_TILE}(
+ size_t channels,
+ const int16_t* input,
+ uint32_t* output) {
+
+ assert(channels != 0);
+ assert(input != NULL);
+ assert(output != NULL);
+
+ $if CHANNEL_TILE > 1:
+ for (; channels >= ${CHANNEL_TILE}; channels -= ${CHANNEL_TILE}) {
+ $for C in range(CHANNEL_TILE):
+ const int32_t vr${C} = (int32_t) input[${C * 2}];
+ const int32_t vi${C} = (int32_t) input[${C * 2 + 1}];
+ input += ${CHANNEL_TILE} * 2;
+
+ $for C in range(CHANNEL_TILE):
+ const uint32_t vrsquare${C} = (uint32_t) (vr${C} * vr${C});
+ const uint32_t visquare${C} = (uint32_t) (vi${C} * vi${C});
+
+ $for C in range(CHANNEL_TILE):
+ const uint32_t vout${C} = vrsquare${C} + visquare${C};
+
+ $for C in range(CHANNEL_TILE):
+ output[${C}] = vout${C};
+ output += ${CHANNEL_TILE};
+ }
+
+ if XNN_UNLIKELY(channels != 0) {
+ do {
+ const int32_t vr = (int32_t) input[0];
+ const int32_t vi = (int32_t) input[1];
+ input += 2;
+
+ const uint32_t vrsquare = (uint32_t) (vr * vr);
+ const uint32_t visquare = (uint32_t) (vi * vi);
+
+ const uint32_t vout = vrsquare + visquare;
+
+ *output++ = vout;
+ } while (--channels != 0);
+ }
+}
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index 3a0e5afb4..d0dd855a8 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -1596,6 +1596,11 @@ typedef void (*xnn_s16_vlshift_ukernel_function)(
uint32_t shift,
int16_t* output);
+typedef void (*xnn_cs16_vsquareabs_ukernel_function)(
+ size_t channels,
+ const int16_t* input,
+ uint32_t* output);
+
// Reduce-Add Extended ("mantissa" + "exponent") Exponentials
typedef void (*xnn_f32_raddextexp_ukernel_function)(
diff --git a/src/xnnpack/vsquareabs.h b/src/xnnpack/vsquareabs.h
new file mode 100644
index 000000000..b77470263
--- /dev/null
+++ b/src/xnnpack/vsquareabs.h
@@ -0,0 +1,33 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack/params.h>
+#include <xnnpack/common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(fn_name) \
+ XNN_INTERNAL void fn_name( \
+ size_t channels, \
+ const int16_t* input, \
+ uint32_t* output);
+
+
+DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(xnn_cs16_vsquareabs_ukernel__scalar_x1)
+DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(xnn_cs16_vsquareabs_ukernel__scalar_x2)
+DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(xnn_cs16_vsquareabs_ukernel__scalar_x3)
+DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(xnn_cs16_vsquareabs_ukernel__scalar_x4)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/test/cs16-vsquareabs.cc b/test/cs16-vsquareabs.cc
new file mode 100644
index 000000000..d4a9d581b
--- /dev/null
+++ b/test/cs16-vsquareabs.cc
@@ -0,0 +1,125 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+//
+// Auto-generated file. Do not edit!
+// Specification: test/cs16-vsquareabs.yaml
+// Generator: tools/generate-vsquareabs-test.py
+
+
+#include <gtest/gtest.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
+#include <xnnpack/vsquareabs.h>
+#include "vsquareabs-microkernel-tester.h"
+
+
+TEST(CS16_VSQUAREABS__SCALAR_X1, channels_eq_1) {
+ VSquareAbsMicrokernelTester()
+ .channels(1)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x1);
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X1, channels_gt_1) {
+ for (size_t channels = 2; channels < 10; channels++) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x1);
+ }
+}
+
+
+TEST(CS16_VSQUAREABS__SCALAR_X2, channels_eq_2) {
+ VSquareAbsMicrokernelTester()
+ .channels(2)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x2);
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X2, channels_div_2) {
+ for (size_t channels = 4; channels < 20; channels += 2) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x2);
+ }
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X2, channels_lt_2) {
+ for (size_t channels = 1; channels < 2; channels++) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x2);
+ }
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X2, channels_gt_2) {
+ for (size_t channels = 3; channels < 4; channels++) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x2);
+ }
+}
+
+
+TEST(CS16_VSQUAREABS__SCALAR_X3, channels_eq_3) {
+ VSquareAbsMicrokernelTester()
+ .channels(3)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x3);
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X3, channels_div_3) {
+ for (size_t channels = 6; channels < 30; channels += 3) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x3);
+ }
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X3, channels_lt_3) {
+ for (size_t channels = 1; channels < 3; channels++) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x3);
+ }
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X3, channels_gt_3) {
+ for (size_t channels = 4; channels < 6; channels++) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x3);
+ }
+}
+
+
+TEST(CS16_VSQUAREABS__SCALAR_X4, channels_eq_4) {
+ VSquareAbsMicrokernelTester()
+ .channels(4)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x4);
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X4, channels_div_4) {
+ for (size_t channels = 8; channels < 40; channels += 4) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x4);
+ }
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X4, channels_lt_4) {
+ for (size_t channels = 1; channels < 4; channels++) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x4);
+ }
+}
+
+TEST(CS16_VSQUAREABS__SCALAR_X4, channels_gt_4) {
+ for (size_t channels = 5; channels < 8; channels++) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(xnn_cs16_vsquareabs_ukernel__scalar_x4);
+ }
+}
diff --git a/test/cs16-vsquareabs.yaml b/test/cs16-vsquareabs.yaml
new file mode 100644
index 000000000..30b1a39bf
--- /dev/null
+++ b/test/cs16-vsquareabs.yaml
@@ -0,0 +1,11 @@
+# Copyright 2022 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+# Scalar
+- name: xnn_cs16_vsquareabs_ukernel__scalar_x1
+- name: xnn_cs16_vsquareabs_ukernel__scalar_x2
+- name: xnn_cs16_vsquareabs_ukernel__scalar_x3
+- name: xnn_cs16_vsquareabs_ukernel__scalar_x4
diff --git a/test/vsquareabs-microkernel-tester.h b/test/vsquareabs-microkernel-tester.h
new file mode 100644
index 000000000..5726634ec
--- /dev/null
+++ b/test/vsquareabs-microkernel-tester.h
@@ -0,0 +1,81 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstddef>
+#include <cstdlib>
+#include <random>
+#include <vector>
+
+#include <xnnpack.h>
+#include <xnnpack/aligned-allocator.h>
+#include <xnnpack/params.h>
+
+
+class VSquareAbsMicrokernelTester {
+ public:
+ inline VSquareAbsMicrokernelTester& channels(size_t channels) {
+ assert(channels != 0);
+ this->channels_ = channels;
+ return *this;
+ }
+
+ inline size_t channels() const {
+ return this->channels_;
+ }
+
+ inline VSquareAbsMicrokernelTester& iterations(size_t iterations) {
+ this->iterations_ = iterations;
+ return *this;
+ }
+
+ inline size_t iterations() const {
+ return this->iterations_;
+ }
+
+ void Test(xnn_cs16_vsquareabs_ukernel_function vsquareabs) const {
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto i16rng = std::bind(std::uniform_int_distribution<int16_t>(), std::ref(rng));
+
+ std::vector<int16_t> x(channels() * 2 + XNN_EXTRA_BYTES / sizeof(int16_t));
+ std::vector<uint32_t> y(channels());
+ std::vector<uint32_t> y_ref(channels());
+
+ for (size_t iteration = 0; iteration < iterations(); iteration++) {
+ std::generate(x.begin(), x.end(), std::ref(i16rng));
+ std::fill(y.begin(), y.end(), INT32_C(0x12345678));
+
+ // Compute reference results.
+ for (size_t c = 0; c < channels(); c++) {
+ const int16_t r = x[c * 2];
+ const int16_t i = x[c * 2 + 1];
+ uint32_t rsquare = static_cast<uint32_t>(static_cast<int32_t>(r) * static_cast<int32_t>(r));
+ uint32_t isquare = static_cast<uint32_t>(static_cast<int32_t>(i) * static_cast<int32_t>(i));
+ uint32_t value = rsquare + isquare;
+ y_ref[c] = value;
+ }
+
+ // Call optimized micro-kernel.
+ vsquareabs(channels(), x.data(), y.data());
+
+ // Verify results.
+ for (size_t c = 0; c < channels(); c++) {
+ ASSERT_EQ(y[c], y_ref[c])
+ << ", channel " << c << " / " << channels();
+ }
+ }
+ }
+
+ private:
+ size_t channels_{1};
+ size_t iterations_{15};
+};
diff --git a/tools/generate-vsquareabs-test.py b/tools/generate-vsquareabs-test.py
new file mode 100755
index 000000000..db1d0e5af
--- /dev/null
+++ b/tools/generate-vsquareabs-test.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+# Copyright 2022 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import codecs
+import math
+import os
+import re
+import sys
+import yaml
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from primes import next_prime
+import xngen
+import xnncommon
+
+
+parser = argparse.ArgumentParser(description='VSquareAbs microkernel test generator')
+parser.add_argument("-s", "--spec", metavar="FILE", required=True,
+ help="Specification (YAML) file")
+parser.add_argument("-o", "--output", metavar="FILE", required=True,
+ help='Output (C++ source) file')
+parser.set_defaults(defines=list())
+
+
+def split_ukernel_name(name):
+ match = re.fullmatch(r"xnn_cs16_vsquareabs_ukernel__(.+)_x(\d+)", name)
+ assert match is not None
+ channel_tile = int(match.group(2))
+
+ arch, isa = xnncommon.parse_target_name(target_name=match.group(1))
+ return channel_tile, arch, isa
+
+
+VSQUAREABS_TEST_TEMPLATE = """\
+TEST(${TEST_NAME}, channels_eq_${CHANNEL_TILE}) {
+ $if ISA_CHECK:
+ ${ISA_CHECK};
+ VSquareAbsMicrokernelTester()
+ .channels(${CHANNEL_TILE})
+ .Test(${", ".join(TEST_ARGS)});
+}
+
+$if CHANNEL_TILE > 1:
+ TEST(${TEST_NAME}, channels_div_${CHANNEL_TILE}) {
+ $if ISA_CHECK:
+ ${ISA_CHECK};
+ for (size_t channels = ${CHANNEL_TILE*2}; channels < ${CHANNEL_TILE*10}; channels += ${CHANNEL_TILE}) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(${", ".join(TEST_ARGS)});
+ }
+ }
+
+ TEST(${TEST_NAME}, channels_lt_${CHANNEL_TILE}) {
+ $if ISA_CHECK:
+ ${ISA_CHECK};
+ for (size_t channels = 1; channels < ${CHANNEL_TILE}; channels++) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(${", ".join(TEST_ARGS)});
+ }
+ }
+
+TEST(${TEST_NAME}, channels_gt_${CHANNEL_TILE}) {
+ $if ISA_CHECK:
+ ${ISA_CHECK};
+ for (size_t channels = ${CHANNEL_TILE+1}; channels < ${10 if CHANNEL_TILE == 1 else CHANNEL_TILE*2}; channels++) {
+ VSquareAbsMicrokernelTester()
+ .channels(channels)
+ .Test(${", ".join(TEST_ARGS)});
+ }
+}
+
+"""
+
+
+def generate_test_cases(ukernel, channel_tile, isa):
+ """Generates all tests cases for a VSquareAbs micro-kernel.
+
+ Args:
+ ukernel: C name of the micro-kernel function.
+ channel_tile: Number of channels processed per one iteration of the inner
+ loop of the micro-kernel.
+ isa: instruction set required to run the micro-kernel. Generated unit test
+ will skip execution if the host processor doesn't support this ISA.
+
+ Returns:
+ Code for the test case.
+ """
+ _, test_name = ukernel.split("_", 1)
+ _, datatype, ukernel_type, _ = ukernel.split("_", 3)
+ return xngen.preprocess(VSQUAREABS_TEST_TEMPLATE, {
+ "TEST_NAME": test_name.upper().replace("UKERNEL_", ""),
+ "TEST_ARGS": [ukernel],
+ "DATATYPE": datatype,
+ "CHANNEL_TILE": channel_tile,
+ "ISA_CHECK": xnncommon.generate_isa_check_macro(isa),
+ "next_prime": next_prime,
+ })
+
+
+def main(args):
+ options = parser.parse_args(args)
+
+ with codecs.open(options.spec, "r", encoding="utf-8") as spec_file:
+ spec_yaml = yaml.safe_load(spec_file)
+ if not isinstance(spec_yaml, list):
+ raise ValueError("expected a list of micro-kernels in the spec")
+
+ tests = """\
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+//
+// Auto-generated file. Do not edit!
+// Specification: {specification}
+// Generator: {generator}
+
+
+#include <gtest/gtest.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
+#include <xnnpack/vsquareabs.h>
+#include "vsquareabs-microkernel-tester.h"
+""".format(specification=options.spec, generator=sys.argv[0])
+
+ for ukernel_spec in spec_yaml:
+ name = ukernel_spec["name"]
+ channel_tile, arch, isa = split_ukernel_name(name)
+
+ # specification can override architecture
+ arch = ukernel_spec.get("arch", arch)
+
+ test_case = generate_test_cases(name, channel_tile, isa)
+ tests += "\n\n" + xnncommon.postprocess_test_case(test_case, arch, isa)
+
+ txt_changed = True
+ if os.path.exists(options.output):
+ with codecs.open(options.output, "r", encoding="utf-8") as output_file:
+ txt_changed = output_file.read() != tests
+
+ if txt_changed:
+ with codecs.open(options.output, "w", encoding="utf-8") as output_file:
+ output_file.write(tests)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])