diff options
author | Marat Dukhan <maratek@google.com> | 2022-08-21 19:11:13 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-08-21 19:12:05 -0700 |
commit | 9614006ee79cf8fc4688bde5be135ef906c3e21c (patch) | |
tree | 6c74b3af24ec4adf8eba24df38da6f09c6e61565 | |
parent | 3fd2d48a6bb392fbe033c03ae82f190ba7d186a9 (diff) | |
download | XNNPACK-9614006ee79cf8fc4688bde5be135ef906c3e21c.tar.gz |
Evaluation stubs for U32 SQRT using F32 SQRT
PiperOrigin-RevId: 469077131
-rw-r--r-- | BUILD.bazel | 2 | ||||
-rwxr-xr-x | CMakeLists.txt | 2 | ||||
-rw-r--r-- | eval/u32-sqrt.cc | 88 | ||||
-rw-r--r-- | src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c | 39 | ||||
-rw-r--r-- | src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c | 39 | ||||
-rw-r--r-- | src/xnnpack/math-stubs.h | 3 |
6 files changed, 173 insertions, 0 deletions
diff --git a/BUILD.bazel b/BUILD.bazel index 546fc4431..d9845f862 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1051,6 +1051,8 @@ ALL_SCALAR_MICROKERNEL_SRCS = [ "src/math/sqrt-u32-scalar-cvti32-sqrt-lrint.c", "src/math/sqrt-u32-scalar-cvti64-sqrt-lrint.c", "src/math/sqrt-u32-scalar-cvtu32-sqrt-lrint.c", + "src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c", + "src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c", "src/math/sqrt-u32-scalar-hashemian.c", "src/math/sqrt-u32-scalar-tflm.c", "src/qc8-dwconv/gen/up1x3-minmax-fp32-scalar-fmagic.c", diff --git a/CMakeLists.txt b/CMakeLists.txt index 721e0ef94..153faf0cc 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1039,6 +1039,8 @@ SET(ALL_SCALAR_MICROKERNEL_SRCS src/math/sqrt-u32-scalar-cvti32-sqrt-lrint.c src/math/sqrt-u32-scalar-cvti64-sqrt-lrint.c src/math/sqrt-u32-scalar-cvtu32-sqrt-lrint.c + src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c + src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c src/math/sqrt-u32-scalar-hashemian.c src/math/sqrt-u32-scalar-tflm.c src/qc8-dwconv/gen/up1x3-minmax-fp32-scalar-fmagic.c diff --git a/eval/u32-sqrt.cc b/eval/u32-sqrt.cc index 7605f8c91..5b4d87576 100644 --- a/eval/u32-sqrt.cc +++ b/eval/u32-sqrt.cc @@ -284,6 +284,94 @@ TEST(SQRT__SCALAR_CVTU32_SQRT_LRINT, 65536_output) { } +TEST(SQRT__SCALAR_CVTI64_SQRTF_LRINTF, uint16_output) { + std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize); + std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize); + for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) { + for (uint32_t i = 0; i < kBlockSize; i++) { + inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760)); + } + xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data()); + for (uint32_t i = 0; i < kBlockSize; i++) { + const uint32_t input = inputs[i]; + const uint32_t output = outputs[i]; + const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output)); + + const uint32_t prev_output = output - 1; + const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output)); + ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input))) + << "input = " << input << ", output = " << output; + + const uint32_t next_output = output + 1; + const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output)); + ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input))) + << "input = " << input << ", output = " << output; + } + } +} + +TEST(SQRT__SCALAR_CVTI64_SQRTF_LRINTF, 65536_output) { + std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize); + std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize); + for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) { + for (uint32_t i = 0; i < kBlockSize; i++) { + inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761)); + } + xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data()); + for (uint32_t i = 0; i < kBlockSize; i++) { + const uint32_t input = inputs[i]; + const uint32_t output = outputs[i]; + ASSERT_EQ(output, UINT32_C(0x00010000)) + << "input = " << input << ", output = " << output; + } + } +} + + +TEST(SQRT__SCALAR_CVTU32_SQRTF_LRINTF, uint16_output) { + std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize); + std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize); + for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) { + for (uint32_t i = 0; i < kBlockSize; i++) { + inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760)); + } + xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data()); + for (uint32_t i = 0; i < kBlockSize; i++) { + const uint32_t input = inputs[i]; + const uint32_t output = outputs[i]; + const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output)); + + const uint32_t prev_output = output - 1; + const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output)); + ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input))) + << "input = " << input << ", output = " << output; + + const uint32_t next_output = output + 1; + const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output)); + ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input))) + << "input = " << input << ", output = " << output; + } + } +} + +TEST(SQRT__SCALAR_CVTU32_SQRTF_LRINTF, 65536_output) { + std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize); + std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize); + for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) { + for (uint32_t i = 0; i < kBlockSize; i++) { + inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761)); + } + xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data()); + for (uint32_t i = 0; i < kBlockSize; i++) { + const uint32_t input = inputs[i]; + const uint32_t output = outputs[i]; + ASSERT_EQ(output, UINT32_C(0x00010000)) + << "input = " << input << ", output = " << output; + } + } +} + + TEST(SQRT__SCALAR_HASHEMIAN, uint16_output) { std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize); std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize); diff --git a/src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c b/src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c new file mode 100644 index 000000000..dd4e68529 --- /dev/null +++ b/src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c @@ -0,0 +1,39 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <assert.h> +#include <stddef.h> +#include <math.h> + +#include <xnnpack/common.h> +#include <xnnpack/math-stubs.h> + + +void xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf( + size_t n, + const uint32_t* input, + uint32_t* output) +{ + assert(n % sizeof(uint32_t) == 0); + + for (; n != 0; n -= sizeof(uint32_t)) { + const uint32_t vx = *input++; + + uint32_t vy = vx; + if XNN_LIKELY(vx != 0) { + float vf = (float) (double) (int64_t) (uint64_t) vx; + vf = sqrtf(vf); + vy = (uint32_t) (int32_t) lrintf(vf); + const uint32_t vsquared_y_less_x = vy * vy - vx; + if XNN_UNPREDICTABLE((int32_t) (vsquared_y_less_x + vy) < 0) { + vy += 1; + } else if XNN_UNPREDICTABLE((int32_t) (vsquared_y_less_x - vy) >= 0) { + vy -= 1; + } + } + + *output++ = vy; + } +} diff --git a/src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c b/src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c new file mode 100644 index 000000000..cb3b9e94b --- /dev/null +++ b/src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c @@ -0,0 +1,39 @@ +// Copyright 2022 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include <assert.h> +#include <stddef.h> +#include <math.h> + +#include <xnnpack/common.h> +#include <xnnpack/math-stubs.h> + + +void xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf( + size_t n, + const uint32_t* input, + uint32_t* output) +{ + assert(n % sizeof(uint32_t) == 0); + + for (; n != 0; n -= sizeof(uint32_t)) { + const uint32_t vx = *input++; + + uint32_t vy = vx; + if XNN_LIKELY(vx != 0) { + float vf = (float) vx; + vf = sqrtf(vf); + vy = (uint32_t) (int32_t) lrintf(vf); + const uint32_t vsquared_y_less_x = vy * vy - vx; + if XNN_UNPREDICTABLE((int32_t) (vsquared_y_less_x + vy) < 0) { + vy += 1; + } else if XNN_UNPREDICTABLE((int32_t) (vsquared_y_less_x - vy) >= 0) { + vy -= 1; + } + } + + *output++ = vy; + } +} diff --git a/src/xnnpack/math-stubs.h b/src/xnnpack/math-stubs.h index e79635384..1bedd8501 100644 --- a/src/xnnpack/math-stubs.h +++ b/src/xnnpack/math-stubs.h @@ -354,6 +354,9 @@ DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_clz_newton) DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvti32_sqrt_lrint) DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvti64_sqrt_lrint) DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvtu32_sqrt_lrint) +DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvti32_sqrtf_lrintf) +DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf) +DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf) DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_hashemian) DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_tflm) |