aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2022-07-26 10:47:39 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-07-26 10:48:31 -0700
commite7a0a811b389d96f353478ef1054dc567e503216 (patch)
tree6a5f9ae140c1414ecc06625030b3273159db20f9
parent3f6b0d4d65c2f98ef3957b92dd7e6fbd42828572 (diff)
downloadXNNPACK-e7a0a811b389d96f353478ef1054dc567e503216.tar.gz
Introduce math_clz_nonzero_u32 function
PiperOrigin-RevId: 463376790
-rw-r--r--src/math/sqrt-u32-scalar-clz-newton.c4
-rw-r--r--src/math/sqrt-u32-scalar-hashemian.c2
-rw-r--r--src/math/sqrt-u32-scalar-tflm.c2
-rw-r--r--src/xnnpack/math.h11
4 files changed, 15 insertions, 4 deletions
diff --git a/src/math/sqrt-u32-scalar-clz-newton.c b/src/math/sqrt-u32-scalar-clz-newton.c
index 9d4da5965..b09448532 100644
--- a/src/math/sqrt-u32-scalar-clz-newton.c
+++ b/src/math/sqrt-u32-scalar-clz-newton.c
@@ -23,8 +23,8 @@ void xnn_math_u32_sqrt__scalar_clz_newton(
uint32_t vy = vx;
// Based on Hacker's Delight, Figure 11-1.
- if (vx > 1) {
- const uint32_t vs = 16 - (math_clz_u32(vx - 1) >> 1);
+ if (vx != 0) {
+ const uint32_t vs = 16 - (math_clz_nonzero_u32(vx - 1) >> 1);
uint32_t vg0 = UINT32_C(1) << vs;
uint32_t vg1 = (vg0 + (vx >> vs)) >> 1;
diff --git a/src/math/sqrt-u32-scalar-hashemian.c b/src/math/sqrt-u32-scalar-hashemian.c
index fd679ad20..af5f1849d 100644
--- a/src/math/sqrt-u32-scalar-hashemian.c
+++ b/src/math/sqrt-u32-scalar-hashemian.c
@@ -27,7 +27,7 @@ void xnn_math_u32_sqrt__scalar_hashemian(
* and StackOverflow answer https://stackoverflow.com/a/31149161
*/
- const uint32_t vn = math_clz_u32(vx);
+ const uint32_t vn = math_clz_nonzero_u32(vx);
const uint32_t vleft_shift = vn & 1;
const uint32_t vm_minus_1 = 15 - (vn >> 1);
const uint32_t vm_plus_1 = vm_minus_1 + 2;
diff --git a/src/math/sqrt-u32-scalar-tflm.c b/src/math/sqrt-u32-scalar-tflm.c
index 72cab03e6..9edfe15e9 100644
--- a/src/math/sqrt-u32-scalar-tflm.c
+++ b/src/math/sqrt-u32-scalar-tflm.c
@@ -23,7 +23,7 @@ void xnn_math_u32_sqrt__scalar_tflm(
// Algorithm adapted from tensorflow/lite/experimental/microfrontend/lib/filterbank.c in TFLite-Micro
uint32_t vy = 0;
if (vx != 0) {
- const uint32_t vn = (math_clz_u32(vx) | 1) ^ 31;
+ const uint32_t vn = (math_clz_nonzero_u32(vx) | 1) ^ 31;
uint32_t vb = UINT32_C(1) << vn;
uint32_t iterations = (vn >> 1) + 1;
while (iterations--) {
diff --git a/src/xnnpack/math.h b/src/xnnpack/math.h
index deefacf6f..7704fd5ba 100644
--- a/src/xnnpack/math.h
+++ b/src/xnnpack/math.h
@@ -193,6 +193,17 @@ XNN_INLINE static uint32_t math_clz_u32(uint32_t x) {
#endif
}
+XNN_INLINE static uint32_t math_clz_nonzero_u32(uint32_t x) {
+ assert(x != 0);
+ #ifdef _MSC_VER
+ unsigned long index;
+ _BitScanReverse(&index, (unsigned long) x);
+ return (uint32_t) index ^ 31;
+ #else
+ return (uint32_t) __builtin_clz((unsigned int) x);
+ #endif
+}
+
XNN_INLINE static uint32_t math_ctz_u32(uint32_t x) {
#ifdef _MSC_VER
unsigned long index;