diff options
author | Frank Barchard <fbarchard@google.com> | 2022-07-28 14:09:52 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-07-28 14:10:42 -0700 |
commit | a40c3b2be6348b684bb4a44fdb5c42b9b3cb6db5 (patch) | |
tree | 76913d34f7358eb57fbb24b61bfb638982918985 | |
parent | f77cd4974c180b8569239dc4dd4fc3c0196c1e9c (diff) | |
download | XNNPACK-a40c3b2be6348b684bb4a44fdb5c42b9b3cb6db5.tar.gz |
CS16 squareabs microkernel
- Scalar implementation
PiperOrigin-RevId: 463921828
-rw-r--r-- | BUILD.bazel | 22 | ||||
-rwxr-xr-x | CMakeLists.txt | 13 | ||||
-rw-r--r-- | bench/cs16-vsquareabs.cc | 68 | ||||
-rwxr-xr-x | scripts/generate-cs16-vsquareabs.sh | 16 | ||||
-rw-r--r-- | src/cs16-vsquareabs/gen/scalar-x1.c | 42 | ||||
-rw-r--r-- | src/cs16-vsquareabs/gen/scalar-x2.c | 61 | ||||
-rw-r--r-- | src/cs16-vsquareabs/gen/scalar-x3.c | 67 | ||||
-rw-r--r-- | src/cs16-vsquareabs/gen/scalar-x4.c | 73 | ||||
-rw-r--r-- | src/cs16-vsquareabs/scalar.c.in | 57 | ||||
-rw-r--r-- | src/xnnpack/params.h | 5 | ||||
-rw-r--r-- | src/xnnpack/vsquareabs.h | 33 | ||||
-rw-r--r-- | test/cs16-vsquareabs.cc | 125 | ||||
-rw-r--r-- | test/cs16-vsquareabs.yaml | 11 | ||||
-rw-r--r-- | test/vsquareabs-microkernel-tester.h | 81 | ||||
-rwxr-xr-x | tools/generate-vsquareabs-test.py | 155 |
15 files changed, 829 insertions, 0 deletions
diff --git a/BUILD.bazel b/BUILD.bazel index 208175616..cde501142 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -620,6 +620,10 @@ ALL_SCALAR_MICROKERNEL_SRCS = [ "src/s16-vlshift/gen/scalar-x2.c", "src/s16-vlshift/gen/scalar-x3.c", "src/s16-vlshift/gen/scalar-x4.c", + "src/cs16-vsquareabs/gen/scalar-x1.c", + "src/cs16-vsquareabs/gen/scalar-x2.c", + "src/cs16-vsquareabs/gen/scalar-x3.c", + "src/cs16-vsquareabs/gen/scalar-x4.c", "src/f16-f32-vcvt/gen/vcvt-scalar-x1.c", "src/f16-f32-vcvt/gen/vcvt-scalar-x2.c", "src/f16-f32-vcvt/gen/vcvt-scalar-x3.c", @@ -8521,6 +8525,7 @@ INTERNAL_MICROKERNEL_HDRS = [ "src/xnnpack/vmulcaddc.h", "src/xnnpack/vscaleexpminusmax.h", "src/xnnpack/vscaleextexp.h", + "src/xnnpack/vsquareabs.h", "src/xnnpack/vunary.h", "src/xnnpack/window.h", "src/xnnpack/zip.h", @@ -11765,6 +11770,14 @@ xnnpack_benchmark( ) xnnpack_benchmark( + name = "cs16_vsquareabs_bench", + srcs = [ + "bench/cs16-vsquareabs.cc", + ], + deps = MICROKERNEL_BENCHMARK_DEPS, +) + +xnnpack_benchmark( name = "x8_lut_bench", srcs = [ "bench/x8-lut.cc", @@ -14096,6 +14109,15 @@ xnnpack_unit_test( ) xnnpack_unit_test( + name = "cs16_vsquareabs_test", + srcs = [ + "test/cs16-vsquareabs.cc", + "test/vsquareabs-microkernel-tester.h", + ], + deps = MICROKERNEL_TEST_DEPS, +) + +xnnpack_unit_test( name = "s8_ibilinear_test", srcs = [ "test/ibilinear-microkernel-tester.h", diff --git a/CMakeLists.txt b/CMakeLists.txt index ea1bf1fd1..e91fe40d6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -607,6 +607,10 @@ SET(ALL_SCALAR_MICROKERNEL_SRCS src/s16-vlshift/gen/scalar-x2.c src/s16-vlshift/gen/scalar-x3.c src/s16-vlshift/gen/scalar-x4.c + src/cs16-vsquareabs/gen/scalar-x1.c + src/cs16-vsquareabs/gen/scalar-x2.c + src/cs16-vsquareabs/gen/scalar-x3.c + src/cs16-vsquareabs/gen/scalar-x4.c src/f16-f32-vcvt/gen/vcvt-scalar-x1.c src/f16-f32-vcvt/gen/vcvt-scalar-x2.c src/f16-f32-vcvt/gen/vcvt-scalar-x3.c @@ -8637,6 +8641,11 @@ IF(XNNPACK_BUILD_TESTS) TARGET_LINK_LIBRARIES(s16-vlshift-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main) ADD_TEST(NAME s16-vlshift-test COMMAND s16-vlshift-test) + ADD_EXECUTABLE(cs16-vsquareabs-test test/cs16-vsquareabs.cc $<TARGET_OBJECTS:all_microkernels>) + TARGET_INCLUDE_DIRECTORIES(cs16-vsquareabs-test PRIVATE include src test) + TARGET_LINK_LIBRARIES(cs16-vsquareabs-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main) + ADD_TEST(NAME cs16-vsquareabs-test COMMAND cs16-vsquareabs-test) + ADD_EXECUTABLE(s8-ibilinear-test test/s8-ibilinear.cc $<TARGET_OBJECTS:all_microkernels>) TARGET_INCLUDE_DIRECTORIES(s8-ibilinear-test PRIVATE include src test) TARGET_LINK_LIBRARIES(s8-ibilinear-test PRIVATE cpuinfo fp16 pthreadpool gtest gtest_main) @@ -9222,6 +9231,10 @@ IF(XNNPACK_BUILD_BENCHMARKS) TARGET_INCLUDE_DIRECTORIES(s16-vlshift-bench PRIVATE . include src) TARGET_LINK_LIBRARIES(s16-vlshift-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool) + ADD_EXECUTABLE(cs16-vsquareabs-bench bench/cs16-vsquareabs.cc $<TARGET_OBJECTS:all_microkernels>) + TARGET_INCLUDE_DIRECTORIES(cs16-vsquareabs-bench PRIVATE . include src) + TARGET_LINK_LIBRARIES(cs16-vsquareabs-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool) + ADD_EXECUTABLE(xx-transpose-bench bench/x32-transpose.cc $<TARGET_OBJECTS:all_microkernels>) TARGET_INCLUDE_DIRECTORIES(xx-transpose-bench PRIVATE . include src) TARGET_LINK_LIBRARIES(xx-transpose-bench PRIVATE benchmark bench-utils cpuinfo fp16 pthreadpool) diff --git a/bench/cs16-vsquareabs.cc b/bench/cs16-vsquareabs.cc new file mode 100644 index 000000000..b1659f12f --- /dev/null +++ b/bench/cs16-vsquareabs.cc @@ -0,0 +1,68 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <xnnpack/aligned-allocator.h> +#include <xnnpack/common.h> +#include <xnnpack/params.h> +#include <xnnpack/vsquareabs.h> + +#include <algorithm> +#include <cmath> +#include <functional> +#include <numeric> +#include <vector> + +#include "bench/utils.h" +#include <benchmark/benchmark.h> + +void vsquareabs( + benchmark::State& state, + xnn_cs16_vsquareabs_ukernel_function vsquareabs, + benchmark::utils::IsaCheckFunction isa_check = nullptr) +{ + if (isa_check && !isa_check(state)) { + return; + } + const size_t channels = state.range(0); + + std::vector<int16_t, AlignedAllocator<int16_t, 64>> input( + channels * 2 + XNN_EXTRA_BYTES / sizeof(int16_t)); + std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> output(channels); + std::iota(input.begin(), input.end(), 0); + std::iota(output.begin(), output.end(), 0); + + for (auto _ : state) { + vsquareabs(channels, input.data(), output.data()); + } + + const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency(); + if (cpu_frequency != 0) { + state.counters["cpufreq"] = cpu_frequency; + } +} + +static void BenchmarkKernelSize(benchmark::internal::Benchmark* b) +{ + b->ArgNames({"channels"}); + b->Args({32}); + b->Args({64}); + b->Args({117}); + b->Args({400}); + b->Args({1000}); + b->Args({10000}); +} + +BENCHMARK_CAPTURE(vsquareabs, cs16_scalar_x1, xnn_cs16_vsquareabs_ukernel__scalar_x1) + ->Apply(BenchmarkKernelSize)->UseRealTime(); +BENCHMARK_CAPTURE(vsquareabs, cs16_scalar_x2, xnn_cs16_vsquareabs_ukernel__scalar_x2) + ->Apply(BenchmarkKernelSize)->UseRealTime(); +BENCHMARK_CAPTURE(vsquareabs, cs16_scalar_x3, xnn_cs16_vsquareabs_ukernel__scalar_x3) + ->Apply(BenchmarkKernelSize)->UseRealTime(); +BENCHMARK_CAPTURE(vsquareabs, cs16_scalar_x4, xnn_cs16_vsquareabs_ukernel__scalar_x4) + ->Apply(BenchmarkKernelSize)->UseRealTime(); + +#ifndef XNNPACK_BENCHMARK_NO_MAIN +BENCHMARK_MAIN(); +#endif diff --git a/scripts/generate-cs16-vsquareabs.sh b/scripts/generate-cs16-vsquareabs.sh new file mode 100755 index 000000000..59b464d28 --- /dev/null +++ b/scripts/generate-cs16-vsquareabs.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# Copyright 2022 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +################################### SCALAR ################################### +tools/xngen src/cs16-vsquareabs/scalar.c.in -D CHANNEL_TILE=1 -o src/cs16-vsquareabs/gen/scalar-x1.c & +tools/xngen src/cs16-vsquareabs/scalar.c.in -D CHANNEL_TILE=2 -o src/cs16-vsquareabs/gen/scalar-x2.c & +tools/xngen src/cs16-vsquareabs/scalar.c.in -D CHANNEL_TILE=3 -o src/cs16-vsquareabs/gen/scalar-x3.c & +tools/xngen src/cs16-vsquareabs/scalar.c.in -D CHANNEL_TILE=4 -o src/cs16-vsquareabs/gen/scalar-x4.c & + +################################## Unit tests ################################# +tools/generate-vsquareabs-test.py --spec test/cs16-vsquareabs.yaml --output test/cs16-vsquareabs.cc & + +wait diff --git a/src/cs16-vsquareabs/gen/scalar-x1.c b/src/cs16-vsquareabs/gen/scalar-x1.c new file mode 100644 index 000000000..23df908f4 --- /dev/null +++ b/src/cs16-vsquareabs/gen/scalar-x1.c @@ -0,0 +1,42 @@ +// Auto-generated file. Do not edit! +// Template: src/cs16-vsquareabs/scalar.c.in +// Generator: tools/xngen +// +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> + +#include <xnnpack/math.h> +#include <xnnpack/vsquareabs.h> + + +void xnn_cs16_vsquareabs_ukernel__scalar_x1( + size_t channels, + const int16_t* input, + uint32_t* output) { + + assert(channels != 0); + assert(input != NULL); + assert(output != NULL); + + + if XNN_UNLIKELY(channels != 0) { + do { + const int32_t vr = (int32_t) input[0]; + const int32_t vi = (int32_t) input[1]; + input += 2; + + const uint32_t vrsquare = (uint32_t) (vr * vr); + const uint32_t visquare = (uint32_t) (vi * vi); + + const uint32_t vout = vrsquare + visquare; + + *output++ = vout; + } while (--channels != 0); + } +} diff --git a/src/cs16-vsquareabs/gen/scalar-x2.c b/src/cs16-vsquareabs/gen/scalar-x2.c new file mode 100644 index 000000000..0871a0e1a --- /dev/null +++ b/src/cs16-vsquareabs/gen/scalar-x2.c @@ -0,0 +1,61 @@ +// Auto-generated file. Do not edit! +// Template: src/cs16-vsquareabs/scalar.c.in +// Generator: tools/xngen +// +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> + +#include <xnnpack/math.h> +#include <xnnpack/vsquareabs.h> + + +void xnn_cs16_vsquareabs_ukernel__scalar_x2( + size_t channels, + const int16_t* input, + uint32_t* output) { + + assert(channels != 0); + assert(input != NULL); + assert(output != NULL); + + for (; channels >= 2; channels -= 2) { + const int32_t vr0 = (int32_t) input[0]; + const int32_t vi0 = (int32_t) input[1]; + const int32_t vr1 = (int32_t) input[2]; + const int32_t vi1 = (int32_t) input[3]; + input += 2 * 2; + + const uint32_t vrsquare0 = (uint32_t) (vr0 * vr0); + const uint32_t visquare0 = (uint32_t) (vi0 * vi0); + const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1); + const uint32_t visquare1 = (uint32_t) (vi1 * vi1); + + const uint32_t vout0 = vrsquare0 + visquare0; + const uint32_t vout1 = vrsquare1 + visquare1; + + output[0] = vout0; + output[1] = vout1; + output += 2; + } + + if XNN_UNLIKELY(channels != 0) { + do { + const int32_t vr = (int32_t) input[0]; + const int32_t vi = (int32_t) input[1]; + input += 2; + + const uint32_t vrsquare = (uint32_t) (vr * vr); + const uint32_t visquare = (uint32_t) (vi * vi); + + const uint32_t vout = vrsquare + visquare; + + *output++ = vout; + } while (--channels != 0); + } +} diff --git a/src/cs16-vsquareabs/gen/scalar-x3.c b/src/cs16-vsquareabs/gen/scalar-x3.c new file mode 100644 index 000000000..9e3fdcdfb --- /dev/null +++ b/src/cs16-vsquareabs/gen/scalar-x3.c @@ -0,0 +1,67 @@ +// Auto-generated file. Do not edit! +// Template: src/cs16-vsquareabs/scalar.c.in +// Generator: tools/xngen +// +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> + +#include <xnnpack/math.h> +#include <xnnpack/vsquareabs.h> + + +void xnn_cs16_vsquareabs_ukernel__scalar_x3( + size_t channels, + const int16_t* input, + uint32_t* output) { + + assert(channels != 0); + assert(input != NULL); + assert(output != NULL); + + for (; channels >= 3; channels -= 3) { + const int32_t vr0 = (int32_t) input[0]; + const int32_t vi0 = (int32_t) input[1]; + const int32_t vr1 = (int32_t) input[2]; + const int32_t vi1 = (int32_t) input[3]; + const int32_t vr2 = (int32_t) input[4]; + const int32_t vi2 = (int32_t) input[5]; + input += 3 * 2; + + const uint32_t vrsquare0 = (uint32_t) (vr0 * vr0); + const uint32_t visquare0 = (uint32_t) (vi0 * vi0); + const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1); + const uint32_t visquare1 = (uint32_t) (vi1 * vi1); + const uint32_t vrsquare2 = (uint32_t) (vr2 * vr2); + const uint32_t visquare2 = (uint32_t) (vi2 * vi2); + + const uint32_t vout0 = vrsquare0 + visquare0; + const uint32_t vout1 = vrsquare1 + visquare1; + const uint32_t vout2 = vrsquare2 + visquare2; + + output[0] = vout0; + output[1] = vout1; + output[2] = vout2; + output += 3; + } + + if XNN_UNLIKELY(channels != 0) { + do { + const int32_t vr = (int32_t) input[0]; + const int32_t vi = (int32_t) input[1]; + input += 2; + + const uint32_t vrsquare = (uint32_t) (vr * vr); + const uint32_t visquare = (uint32_t) (vi * vi); + + const uint32_t vout = vrsquare + visquare; + + *output++ = vout; + } while (--channels != 0); + } +} diff --git a/src/cs16-vsquareabs/gen/scalar-x4.c b/src/cs16-vsquareabs/gen/scalar-x4.c new file mode 100644 index 000000000..b4782f422 --- /dev/null +++ b/src/cs16-vsquareabs/gen/scalar-x4.c @@ -0,0 +1,73 @@ +// Auto-generated file. Do not edit! +// Template: src/cs16-vsquareabs/scalar.c.in +// Generator: tools/xngen +// +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> + +#include <xnnpack/math.h> +#include <xnnpack/vsquareabs.h> + + +void xnn_cs16_vsquareabs_ukernel__scalar_x4( + size_t channels, + const int16_t* input, + uint32_t* output) { + + assert(channels != 0); + assert(input != NULL); + assert(output != NULL); + + for (; channels >= 4; channels -= 4) { + const int32_t vr0 = (int32_t) input[0]; + const int32_t vi0 = (int32_t) input[1]; + const int32_t vr1 = (int32_t) input[2]; + const int32_t vi1 = (int32_t) input[3]; + const int32_t vr2 = (int32_t) input[4]; + const int32_t vi2 = (int32_t) input[5]; + const int32_t vr3 = (int32_t) input[6]; + const int32_t vi3 = (int32_t) input[7]; + input += 4 * 2; + + const uint32_t vrsquare0 = (uint32_t) (vr0 * vr0); + const uint32_t visquare0 = (uint32_t) (vi0 * vi0); + const uint32_t vrsquare1 = (uint32_t) (vr1 * vr1); + const uint32_t visquare1 = (uint32_t) (vi1 * vi1); + const uint32_t vrsquare2 = (uint32_t) (vr2 * vr2); + const uint32_t visquare2 = (uint32_t) (vi2 * vi2); + const uint32_t vrsquare3 = (uint32_t) (vr3 * vr3); + const uint32_t visquare3 = (uint32_t) (vi3 * vi3); + + const uint32_t vout0 = vrsquare0 + visquare0; + const uint32_t vout1 = vrsquare1 + visquare1; + const uint32_t vout2 = vrsquare2 + visquare2; + const uint32_t vout3 = vrsquare3 + visquare3; + + output[0] = vout0; + output[1] = vout1; + output[2] = vout2; + output[3] = vout3; + output += 4; + } + + if XNN_UNLIKELY(channels != 0) { + do { + const int32_t vr = (int32_t) input[0]; + const int32_t vi = (int32_t) input[1]; + input += 2; + + const uint32_t vrsquare = (uint32_t) (vr * vr); + const uint32_t visquare = (uint32_t) (vi * vi); + + const uint32_t vout = vrsquare + visquare; + + *output++ = vout; + } while (--channels != 0); + } +} diff --git a/src/cs16-vsquareabs/scalar.c.in b/src/cs16-vsquareabs/scalar.c.in new file mode 100644 index 000000000..6a4ceeb4d --- /dev/null +++ b/src/cs16-vsquareabs/scalar.c.in @@ -0,0 +1,57 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +$assert CHANNEL_TILE >= 1 +#include <assert.h> +#include <stddef.h> +#include <stdint.h> + +#include <xnnpack/math.h> +#include <xnnpack/vsquareabs.h> + + +void xnn_cs16_vsquareabs_ukernel__scalar_x${CHANNEL_TILE}( + size_t channels, + const int16_t* input, + uint32_t* output) { + + assert(channels != 0); + assert(input != NULL); + assert(output != NULL); + + $if CHANNEL_TILE > 1: + for (; channels >= ${CHANNEL_TILE}; channels -= ${CHANNEL_TILE}) { + $for C in range(CHANNEL_TILE): + const int32_t vr${C} = (int32_t) input[${C * 2}]; + const int32_t vi${C} = (int32_t) input[${C * 2 + 1}]; + input += ${CHANNEL_TILE} * 2; + + $for C in range(CHANNEL_TILE): + const uint32_t vrsquare${C} = (uint32_t) (vr${C} * vr${C}); + const uint32_t visquare${C} = (uint32_t) (vi${C} * vi${C}); + + $for C in range(CHANNEL_TILE): + const uint32_t vout${C} = vrsquare${C} + visquare${C}; + + $for C in range(CHANNEL_TILE): + output[${C}] = vout${C}; + output += ${CHANNEL_TILE}; + } + + if XNN_UNLIKELY(channels != 0) { + do { + const int32_t vr = (int32_t) input[0]; + const int32_t vi = (int32_t) input[1]; + input += 2; + + const uint32_t vrsquare = (uint32_t) (vr * vr); + const uint32_t visquare = (uint32_t) (vi * vi); + + const uint32_t vout = vrsquare + visquare; + + *output++ = vout; + } while (--channels != 0); + } +} diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h index 3a0e5afb4..d0dd855a8 100644 --- a/src/xnnpack/params.h +++ b/src/xnnpack/params.h @@ -1596,6 +1596,11 @@ typedef void (*xnn_s16_vlshift_ukernel_function)( uint32_t shift, int16_t* output); +typedef void (*xnn_cs16_vsquareabs_ukernel_function)( + size_t channels, + const int16_t* input, + uint32_t* output); + // Reduce-Add Extended ("mantissa" + "exponent") Exponentials typedef void (*xnn_f32_raddextexp_ukernel_function)( diff --git a/src/xnnpack/vsquareabs.h b/src/xnnpack/vsquareabs.h new file mode 100644 index 000000000..b77470263 --- /dev/null +++ b/src/xnnpack/vsquareabs.h @@ -0,0 +1,33 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include <stddef.h> +#include <stdint.h> + +#include <xnnpack/params.h> +#include <xnnpack/common.h> + +#ifdef __cplusplus +extern "C" { +#endif + + +#define DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(fn_name) \ + XNN_INTERNAL void fn_name( \ + size_t channels, \ + const int16_t* input, \ + uint32_t* output); + + +DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(xnn_cs16_vsquareabs_ukernel__scalar_x1) +DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(xnn_cs16_vsquareabs_ukernel__scalar_x2) +DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(xnn_cs16_vsquareabs_ukernel__scalar_x3) +DECLARE_CS16_VSQUAREABS_UKERNEL_FUNCTION(xnn_cs16_vsquareabs_ukernel__scalar_x4) + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/test/cs16-vsquareabs.cc b/test/cs16-vsquareabs.cc new file mode 100644 index 000000000..d4a9d581b --- /dev/null +++ b/test/cs16-vsquareabs.cc @@ -0,0 +1,125 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. +// +// Auto-generated file. Do not edit! +// Specification: test/cs16-vsquareabs.yaml +// Generator: tools/generate-vsquareabs-test.py + + +#include <gtest/gtest.h> + +#include <xnnpack/common.h> +#include <xnnpack/isa-checks.h> + +#include <xnnpack/vsquareabs.h> +#include "vsquareabs-microkernel-tester.h" + + +TEST(CS16_VSQUAREABS__SCALAR_X1, channels_eq_1) { + VSquareAbsMicrokernelTester() + .channels(1) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x1); +} + +TEST(CS16_VSQUAREABS__SCALAR_X1, channels_gt_1) { + for (size_t channels = 2; channels < 10; channels++) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x1); + } +} + + +TEST(CS16_VSQUAREABS__SCALAR_X2, channels_eq_2) { + VSquareAbsMicrokernelTester() + .channels(2) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x2); +} + +TEST(CS16_VSQUAREABS__SCALAR_X2, channels_div_2) { + for (size_t channels = 4; channels < 20; channels += 2) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x2); + } +} + +TEST(CS16_VSQUAREABS__SCALAR_X2, channels_lt_2) { + for (size_t channels = 1; channels < 2; channels++) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x2); + } +} + +TEST(CS16_VSQUAREABS__SCALAR_X2, channels_gt_2) { + for (size_t channels = 3; channels < 4; channels++) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x2); + } +} + + +TEST(CS16_VSQUAREABS__SCALAR_X3, channels_eq_3) { + VSquareAbsMicrokernelTester() + .channels(3) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x3); +} + +TEST(CS16_VSQUAREABS__SCALAR_X3, channels_div_3) { + for (size_t channels = 6; channels < 30; channels += 3) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x3); + } +} + +TEST(CS16_VSQUAREABS__SCALAR_X3, channels_lt_3) { + for (size_t channels = 1; channels < 3; channels++) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x3); + } +} + +TEST(CS16_VSQUAREABS__SCALAR_X3, channels_gt_3) { + for (size_t channels = 4; channels < 6; channels++) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x3); + } +} + + +TEST(CS16_VSQUAREABS__SCALAR_X4, channels_eq_4) { + VSquareAbsMicrokernelTester() + .channels(4) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x4); +} + +TEST(CS16_VSQUAREABS__SCALAR_X4, channels_div_4) { + for (size_t channels = 8; channels < 40; channels += 4) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x4); + } +} + +TEST(CS16_VSQUAREABS__SCALAR_X4, channels_lt_4) { + for (size_t channels = 1; channels < 4; channels++) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x4); + } +} + +TEST(CS16_VSQUAREABS__SCALAR_X4, channels_gt_4) { + for (size_t channels = 5; channels < 8; channels++) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(xnn_cs16_vsquareabs_ukernel__scalar_x4); + } +} diff --git a/test/cs16-vsquareabs.yaml b/test/cs16-vsquareabs.yaml new file mode 100644 index 000000000..30b1a39bf --- /dev/null +++ b/test/cs16-vsquareabs.yaml @@ -0,0 +1,11 @@ +# Copyright 2022 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +# Scalar +- name: xnn_cs16_vsquareabs_ukernel__scalar_x1 +- name: xnn_cs16_vsquareabs_ukernel__scalar_x2 +- name: xnn_cs16_vsquareabs_ukernel__scalar_x3 +- name: xnn_cs16_vsquareabs_ukernel__scalar_x4 diff --git a/test/vsquareabs-microkernel-tester.h b/test/vsquareabs-microkernel-tester.h new file mode 100644 index 000000000..5726634ec --- /dev/null +++ b/test/vsquareabs-microkernel-tester.h @@ -0,0 +1,81 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include <gtest/gtest.h> + +#include <algorithm> +#include <cassert> +#include <cmath> +#include <cstddef> +#include <cstdlib> +#include <random> +#include <vector> + +#include <xnnpack.h> +#include <xnnpack/aligned-allocator.h> +#include <xnnpack/params.h> + + +class VSquareAbsMicrokernelTester { + public: + inline VSquareAbsMicrokernelTester& channels(size_t channels) { + assert(channels != 0); + this->channels_ = channels; + return *this; + } + + inline size_t channels() const { + return this->channels_; + } + + inline VSquareAbsMicrokernelTester& iterations(size_t iterations) { + this->iterations_ = iterations; + return *this; + } + + inline size_t iterations() const { + return this->iterations_; + } + + void Test(xnn_cs16_vsquareabs_ukernel_function vsquareabs) const { + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto i16rng = std::bind(std::uniform_int_distribution<int16_t>(), std::ref(rng)); + + std::vector<int16_t> x(channels() * 2 + XNN_EXTRA_BYTES / sizeof(int16_t)); + std::vector<uint32_t> y(channels()); + std::vector<uint32_t> y_ref(channels()); + + for (size_t iteration = 0; iteration < iterations(); iteration++) { + std::generate(x.begin(), x.end(), std::ref(i16rng)); + std::fill(y.begin(), y.end(), INT32_C(0x12345678)); + + // Compute reference results. + for (size_t c = 0; c < channels(); c++) { + const int16_t r = x[c * 2]; + const int16_t i = x[c * 2 + 1]; + uint32_t rsquare = static_cast<uint32_t>(static_cast<int32_t>(r) * static_cast<int32_t>(r)); + uint32_t isquare = static_cast<uint32_t>(static_cast<int32_t>(i) * static_cast<int32_t>(i)); + uint32_t value = rsquare + isquare; + y_ref[c] = value; + } + + // Call optimized micro-kernel. + vsquareabs(channels(), x.data(), y.data()); + + // Verify results. + for (size_t c = 0; c < channels(); c++) { + ASSERT_EQ(y[c], y_ref[c]) + << ", channel " << c << " / " << channels(); + } + } + } + + private: + size_t channels_{1}; + size_t iterations_{15}; +}; diff --git a/tools/generate-vsquareabs-test.py b/tools/generate-vsquareabs-test.py new file mode 100755 index 000000000..db1d0e5af --- /dev/null +++ b/tools/generate-vsquareabs-test.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +# Copyright 2022 Google LLC +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import codecs +import math +import os +import re +import sys +import yaml + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from primes import next_prime +import xngen +import xnncommon + + +parser = argparse.ArgumentParser(description='VSquareAbs microkernel test generator') +parser.add_argument("-s", "--spec", metavar="FILE", required=True, + help="Specification (YAML) file") +parser.add_argument("-o", "--output", metavar="FILE", required=True, + help='Output (C++ source) file') +parser.set_defaults(defines=list()) + + +def split_ukernel_name(name): + match = re.fullmatch(r"xnn_cs16_vsquareabs_ukernel__(.+)_x(\d+)", name) + assert match is not None + channel_tile = int(match.group(2)) + + arch, isa = xnncommon.parse_target_name(target_name=match.group(1)) + return channel_tile, arch, isa + + +VSQUAREABS_TEST_TEMPLATE = """\ +TEST(${TEST_NAME}, channels_eq_${CHANNEL_TILE}) { + $if ISA_CHECK: + ${ISA_CHECK}; + VSquareAbsMicrokernelTester() + .channels(${CHANNEL_TILE}) + .Test(${", ".join(TEST_ARGS)}); +} + +$if CHANNEL_TILE > 1: + TEST(${TEST_NAME}, channels_div_${CHANNEL_TILE}) { + $if ISA_CHECK: + ${ISA_CHECK}; + for (size_t channels = ${CHANNEL_TILE*2}; channels < ${CHANNEL_TILE*10}; channels += ${CHANNEL_TILE}) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(${", ".join(TEST_ARGS)}); + } + } + + TEST(${TEST_NAME}, channels_lt_${CHANNEL_TILE}) { + $if ISA_CHECK: + ${ISA_CHECK}; + for (size_t channels = 1; channels < ${CHANNEL_TILE}; channels++) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(${", ".join(TEST_ARGS)}); + } + } + +TEST(${TEST_NAME}, channels_gt_${CHANNEL_TILE}) { + $if ISA_CHECK: + ${ISA_CHECK}; + for (size_t channels = ${CHANNEL_TILE+1}; channels < ${10 if CHANNEL_TILE == 1 else CHANNEL_TILE*2}; channels++) { + VSquareAbsMicrokernelTester() + .channels(channels) + .Test(${", ".join(TEST_ARGS)}); + } +} + +""" + + +def generate_test_cases(ukernel, channel_tile, isa): + """Generates all tests cases for a VSquareAbs micro-kernel. + + Args: + ukernel: C name of the micro-kernel function. + channel_tile: Number of channels processed per one iteration of the inner + loop of the micro-kernel. + isa: instruction set required to run the micro-kernel. Generated unit test + will skip execution if the host processor doesn't support this ISA. + + Returns: + Code for the test case. + """ + _, test_name = ukernel.split("_", 1) + _, datatype, ukernel_type, _ = ukernel.split("_", 3) + return xngen.preprocess(VSQUAREABS_TEST_TEMPLATE, { + "TEST_NAME": test_name.upper().replace("UKERNEL_", ""), + "TEST_ARGS": [ukernel], + "DATATYPE": datatype, + "CHANNEL_TILE": channel_tile, + "ISA_CHECK": xnncommon.generate_isa_check_macro(isa), + "next_prime": next_prime, + }) + + +def main(args): + options = parser.parse_args(args) + + with codecs.open(options.spec, "r", encoding="utf-8") as spec_file: + spec_yaml = yaml.safe_load(spec_file) + if not isinstance(spec_yaml, list): + raise ValueError("expected a list of micro-kernels in the spec") + + tests = """\ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. +// +// Auto-generated file. Do not edit! +// Specification: {specification} +// Generator: {generator} + + +#include <gtest/gtest.h> + +#include <xnnpack/common.h> +#include <xnnpack/isa-checks.h> + +#include <xnnpack/vsquareabs.h> +#include "vsquareabs-microkernel-tester.h" +""".format(specification=options.spec, generator=sys.argv[0]) + + for ukernel_spec in spec_yaml: + name = ukernel_spec["name"] + channel_tile, arch, isa = split_ukernel_name(name) + + # specification can override architecture + arch = ukernel_spec.get("arch", arch) + + test_case = generate_test_cases(name, channel_tile, isa) + tests += "\n\n" + xnncommon.postprocess_test_case(test_case, arch, isa) + + txt_changed = True + if os.path.exists(options.output): + with codecs.open(options.output, "r", encoding="utf-8") as output_file: + txt_changed = output_file.read() != tests + + if txt_changed: + with codecs.open(options.output, "w", encoding="utf-8") as output_file: + output_file.write(tests) + + +if __name__ == "__main__": + main(sys.argv[1:]) |