aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2022-08-21 19:11:13 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-08-21 19:12:05 -0700
commit9614006ee79cf8fc4688bde5be135ef906c3e21c (patch)
tree6c74b3af24ec4adf8eba24df38da6f09c6e61565
parent3fd2d48a6bb392fbe033c03ae82f190ba7d186a9 (diff)
downloadXNNPACK-9614006ee79cf8fc4688bde5be135ef906c3e21c.tar.gz
Evaluation stubs for U32 SQRT using F32 SQRT
PiperOrigin-RevId: 469077131
-rw-r--r--BUILD.bazel2
-rwxr-xr-xCMakeLists.txt2
-rw-r--r--eval/u32-sqrt.cc88
-rw-r--r--src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c39
-rw-r--r--src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c39
-rw-r--r--src/xnnpack/math-stubs.h3
6 files changed, 173 insertions, 0 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index 546fc4431..d9845f862 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1051,6 +1051,8 @@ ALL_SCALAR_MICROKERNEL_SRCS = [
"src/math/sqrt-u32-scalar-cvti32-sqrt-lrint.c",
"src/math/sqrt-u32-scalar-cvti64-sqrt-lrint.c",
"src/math/sqrt-u32-scalar-cvtu32-sqrt-lrint.c",
+ "src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c",
+ "src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c",
"src/math/sqrt-u32-scalar-hashemian.c",
"src/math/sqrt-u32-scalar-tflm.c",
"src/qc8-dwconv/gen/up1x3-minmax-fp32-scalar-fmagic.c",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 721e0ef94..153faf0cc 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1039,6 +1039,8 @@ SET(ALL_SCALAR_MICROKERNEL_SRCS
src/math/sqrt-u32-scalar-cvti32-sqrt-lrint.c
src/math/sqrt-u32-scalar-cvti64-sqrt-lrint.c
src/math/sqrt-u32-scalar-cvtu32-sqrt-lrint.c
+ src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c
+ src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c
src/math/sqrt-u32-scalar-hashemian.c
src/math/sqrt-u32-scalar-tflm.c
src/qc8-dwconv/gen/up1x3-minmax-fp32-scalar-fmagic.c
diff --git a/eval/u32-sqrt.cc b/eval/u32-sqrt.cc
index 7605f8c91..5b4d87576 100644
--- a/eval/u32-sqrt.cc
+++ b/eval/u32-sqrt.cc
@@ -284,6 +284,94 @@ TEST(SQRT__SCALAR_CVTU32_SQRT_LRINT, 65536_output) {
}
+TEST(SQRT__SCALAR_CVTI64_SQRTF_LRINTF, uint16_output) {
+ std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
+ std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
+ for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
+ }
+ xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t input = inputs[i];
+ const uint32_t output = outputs[i];
+ const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
+
+ const uint32_t prev_output = output - 1;
+ const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
+ ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
+ << "input = " << input << ", output = " << output;
+
+ const uint32_t next_output = output + 1;
+ const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
+ ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
+ << "input = " << input << ", output = " << output;
+ }
+ }
+}
+
+TEST(SQRT__SCALAR_CVTI64_SQRTF_LRINTF, 65536_output) {
+ std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
+ std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
+ }
+ xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t input = inputs[i];
+ const uint32_t output = outputs[i];
+ ASSERT_EQ(output, UINT32_C(0x00010000))
+ << "input = " << input << ", output = " << output;
+ }
+ }
+}
+
+
+TEST(SQRT__SCALAR_CVTU32_SQRTF_LRINTF, uint16_output) {
+ std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
+ std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
+ for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
+ }
+ xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t input = inputs[i];
+ const uint32_t output = outputs[i];
+ const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
+
+ const uint32_t prev_output = output - 1;
+ const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
+ ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
+ << "input = " << input << ", output = " << output;
+
+ const uint32_t next_output = output + 1;
+ const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
+ ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
+ << "input = " << input << ", output = " << output;
+ }
+ }
+}
+
+TEST(SQRT__SCALAR_CVTU32_SQRTF_LRINTF, 65536_output) {
+ std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
+ std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
+ for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
+ }
+ xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
+ for (uint32_t i = 0; i < kBlockSize; i++) {
+ const uint32_t input = inputs[i];
+ const uint32_t output = outputs[i];
+ ASSERT_EQ(output, UINT32_C(0x00010000))
+ << "input = " << input << ", output = " << output;
+ }
+ }
+}
+
+
TEST(SQRT__SCALAR_HASHEMIAN, uint16_output) {
std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
diff --git a/src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c b/src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c
new file mode 100644
index 000000000..dd4e68529
--- /dev/null
+++ b/src/math/sqrt-u32-scalar-cvti64-sqrtf-lrintf.c
@@ -0,0 +1,39 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <math.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf(
+ size_t n,
+ const uint32_t* input,
+ uint32_t* output)
+{
+ assert(n % sizeof(uint32_t) == 0);
+
+ for (; n != 0; n -= sizeof(uint32_t)) {
+ const uint32_t vx = *input++;
+
+ uint32_t vy = vx;
+ if XNN_LIKELY(vx != 0) {
+ float vf = (float) (double) (int64_t) (uint64_t) vx;
+ vf = sqrtf(vf);
+ vy = (uint32_t) (int32_t) lrintf(vf);
+ const uint32_t vsquared_y_less_x = vy * vy - vx;
+ if XNN_UNPREDICTABLE((int32_t) (vsquared_y_less_x + vy) < 0) {
+ vy += 1;
+ } else if XNN_UNPREDICTABLE((int32_t) (vsquared_y_less_x - vy) >= 0) {
+ vy -= 1;
+ }
+ }
+
+ *output++ = vy;
+ }
+}
diff --git a/src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c b/src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c
new file mode 100644
index 000000000..cb3b9e94b
--- /dev/null
+++ b/src/math/sqrt-u32-scalar-cvtu32-sqrtf-lrintf.c
@@ -0,0 +1,39 @@
+// Copyright 2022 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <math.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/math-stubs.h>
+
+
+void xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf(
+ size_t n,
+ const uint32_t* input,
+ uint32_t* output)
+{
+ assert(n % sizeof(uint32_t) == 0);
+
+ for (; n != 0; n -= sizeof(uint32_t)) {
+ const uint32_t vx = *input++;
+
+ uint32_t vy = vx;
+ if XNN_LIKELY(vx != 0) {
+ float vf = (float) vx;
+ vf = sqrtf(vf);
+ vy = (uint32_t) (int32_t) lrintf(vf);
+ const uint32_t vsquared_y_less_x = vy * vy - vx;
+ if XNN_UNPREDICTABLE((int32_t) (vsquared_y_less_x + vy) < 0) {
+ vy += 1;
+ } else if XNN_UNPREDICTABLE((int32_t) (vsquared_y_less_x - vy) >= 0) {
+ vy -= 1;
+ }
+ }
+
+ *output++ = vy;
+ }
+}
diff --git a/src/xnnpack/math-stubs.h b/src/xnnpack/math-stubs.h
index e79635384..1bedd8501 100644
--- a/src/xnnpack/math-stubs.h
+++ b/src/xnnpack/math-stubs.h
@@ -354,6 +354,9 @@ DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_clz_newton)
DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvti32_sqrt_lrint)
DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvti64_sqrt_lrint)
DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvtu32_sqrt_lrint)
+DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvti32_sqrtf_lrintf)
+DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf)
+DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf)
DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_hashemian)
DECLARE_U32_UNARY_MATH_FUNCTION(xnn_math_u32_sqrt__scalar_tflm)