diff options
author | Marat Dukhan <maratek@google.com> | 2022-07-26 10:47:39 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2022-07-26 10:48:31 -0700 |
commit | e7a0a811b389d96f353478ef1054dc567e503216 (patch) | |
tree | 6a5f9ae140c1414ecc06625030b3273159db20f9 | |
parent | 3f6b0d4d65c2f98ef3957b92dd7e6fbd42828572 (diff) | |
download | XNNPACK-e7a0a811b389d96f353478ef1054dc567e503216.tar.gz |
Introduce math_clz_nonzero_u32 function
PiperOrigin-RevId: 463376790
-rw-r--r-- | src/math/sqrt-u32-scalar-clz-newton.c | 4 | ||||
-rw-r--r-- | src/math/sqrt-u32-scalar-hashemian.c | 2 | ||||
-rw-r--r-- | src/math/sqrt-u32-scalar-tflm.c | 2 | ||||
-rw-r--r-- | src/xnnpack/math.h | 11 |
4 files changed, 15 insertions, 4 deletions
diff --git a/src/math/sqrt-u32-scalar-clz-newton.c b/src/math/sqrt-u32-scalar-clz-newton.c index 9d4da5965..b09448532 100644 --- a/src/math/sqrt-u32-scalar-clz-newton.c +++ b/src/math/sqrt-u32-scalar-clz-newton.c @@ -23,8 +23,8 @@ void xnn_math_u32_sqrt__scalar_clz_newton( uint32_t vy = vx; // Based on Hacker's Delight, Figure 11-1. - if (vx > 1) { - const uint32_t vs = 16 - (math_clz_u32(vx - 1) >> 1); + if (vx != 0) { + const uint32_t vs = 16 - (math_clz_nonzero_u32(vx - 1) >> 1); uint32_t vg0 = UINT32_C(1) << vs; uint32_t vg1 = (vg0 + (vx >> vs)) >> 1; diff --git a/src/math/sqrt-u32-scalar-hashemian.c b/src/math/sqrt-u32-scalar-hashemian.c index fd679ad20..af5f1849d 100644 --- a/src/math/sqrt-u32-scalar-hashemian.c +++ b/src/math/sqrt-u32-scalar-hashemian.c @@ -27,7 +27,7 @@ void xnn_math_u32_sqrt__scalar_hashemian( * and StackOverflow answer https://stackoverflow.com/a/31149161 */ - const uint32_t vn = math_clz_u32(vx); + const uint32_t vn = math_clz_nonzero_u32(vx); const uint32_t vleft_shift = vn & 1; const uint32_t vm_minus_1 = 15 - (vn >> 1); const uint32_t vm_plus_1 = vm_minus_1 + 2; diff --git a/src/math/sqrt-u32-scalar-tflm.c b/src/math/sqrt-u32-scalar-tflm.c index 72cab03e6..9edfe15e9 100644 --- a/src/math/sqrt-u32-scalar-tflm.c +++ b/src/math/sqrt-u32-scalar-tflm.c @@ -23,7 +23,7 @@ void xnn_math_u32_sqrt__scalar_tflm( // Algorithm adapted from tensorflow/lite/experimental/microfrontend/lib/filterbank.c in TFLite-Micro uint32_t vy = 0; if (vx != 0) { - const uint32_t vn = (math_clz_u32(vx) | 1) ^ 31; + const uint32_t vn = (math_clz_nonzero_u32(vx) | 1) ^ 31; uint32_t vb = UINT32_C(1) << vn; uint32_t iterations = (vn >> 1) + 1; while (iterations--) { diff --git a/src/xnnpack/math.h b/src/xnnpack/math.h index deefacf6f..7704fd5ba 100644 --- a/src/xnnpack/math.h +++ b/src/xnnpack/math.h @@ -193,6 +193,17 @@ XNN_INLINE static uint32_t math_clz_u32(uint32_t x) { #endif } +XNN_INLINE static uint32_t math_clz_nonzero_u32(uint32_t x) { + assert(x != 0); + #ifdef _MSC_VER + unsigned long index; + _BitScanReverse(&index, (unsigned long) x); + return (uint32_t) index ^ 31; + #else + return (uint32_t) __builtin_clz((unsigned int) x); + #endif +} + XNN_INLINE static uint32_t math_ctz_u32(uint32_t x) { #ifdef _MSC_VER unsigned long index; |