aboutsummaryrefslogtreecommitdiff
path: root/math/math_config.h
diff options
context:
space:
mode:
authorSzabolcs Nagy <szabolcs.nagy@arm.com>2018-06-05 16:15:27 +0100
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2018-06-06 16:17:19 +0100
commitd69e504577169c5f75803f1b97a42822898a78b3 (patch)
tree6196f61c3386e50ad8257d6a1f21c90ef39dddb8 /math/math_config.h
parenta7711a35d57cae0c9fcf0cd61903bbf4701240cf (diff)
downloadarm-optimized-routines-d69e504577169c5f75803f1b97a42822898a78b3.tar.gz
Add new log2 implementation
Similar algorithm is used as in log, but there are more operations (and more error) due to the 1/ln2 multiplier. There is separate code path when fma instruction is not available for computing x/c - 1 precisely, for which the table size is doubled, and to compute (x/c - 1)/ln2 precisely. The worst case error is 0.547 ULP (0.55 without fma), the read only global data size is 1168 bytes (2192 without fma). The non-nearest rounding error is less than 1 ULP. Improvements on Cortex-A72 compared to current glibc master: log latency: 2.04x log thruput: 1.87x
Diffstat (limited to 'math/math_config.h')
-rw-r--r--math/math_config.h14
1 files changed, 14 insertions, 0 deletions
diff --git a/math/math_config.h b/math/math_config.h
index 3383e70..28b7d26 100644
--- a/math/math_config.h
+++ b/math/math_config.h
@@ -309,4 +309,18 @@ extern const struct log_data {
#endif
} __log_data HIDDEN;
+#define LOG2_TABLE_BITS 6
+#define LOG2_POLY_ORDER 7
+#define LOG2_POLY1_ORDER 11
+extern const struct log2_data {
+ double invln2hi;
+ double invln2lo;
+ double poly[LOG2_POLY_ORDER - 1];
+ double poly1[LOG2_POLY1_ORDER - 1];
+ struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
+#if !HAVE_FAST_FMA
+ struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
+#endif
+} __log2_data HIDDEN;
+
#endif