Add new log2 implementation

Similar algorithm is used as in log, but there are more operations (and more error) due to the 1/ln2 multiplier. There is separate code path when fma instruction is not available for computing x/c - 1 precisely, for which the table size is doubled, and to compute (x/c - 1)/ln2 precisely. The worst case error is 0.547 ULP (0.55 without fma), the read only global data size is 1168 bytes (2192 without fma). The non-nearest rounding error is less than 1 ULP. Improvements on Cortex-A72 compared to current glibc master: log latency: 2.04x log thruput: 1.87x
author: Szabolcs Nagy <szabolcs.nagy@arm.com> 2018-06-05 16:15:27 +0100
committer: Szabolcs Nagy <szabolcs.nagy@arm.com> 2018-06-06 16:17:19 +0100
commit: d69e504577169c5f75803f1b97a42822898a78b3 (patch)
tree: 6196f61c3386e50ad8257d6a1f21c90ef39dddb8 /math/math_config.h
parent: a7711a35d57cae0c9fcf0cd61903bbf4701240cf (diff)
download: arm-optimized-routines-d69e504577169c5f75803f1b97a42822898a78b3.tar.gz
1 files changed, 14 insertions, 0 deletions
diff --git a/math/math_config.h b/math/math_config.h
index 3383e70..28b7d26 100644
--- a/math/math_config.h
+++ b/math/math_config.h
@@ -309,4 +309,18 @@ extern const struct log_data {
 #endif
 } __log_data HIDDEN;
 
+#define LOG2_TABLE_BITS 6
+#define LOG2_POLY_ORDER 7
+#define LOG2_POLY1_ORDER 11
+extern const struct log2_data {
+  double invln2hi;
+  double invln2lo;
+  double poly[LOG2_POLY_ORDER - 1];
+  double poly1[LOG2_POLY1_ORDER - 1];
+  struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
+#if !HAVE_FAST_FMA
+  struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
+#endif
+} __log2_data HIDDEN;
+
 #endif
author	Szabolcs Nagy <szabolcs.nagy@arm.com>	2018-06-05 16:15:27 +0100
committer	Szabolcs Nagy <szabolcs.nagy@arm.com>	2018-06-06 16:17:19 +0100
commit	d69e504577169c5f75803f1b97a42822898a78b3 (patch)
tree	6196f61c3386e50ad8257d6a1f21c90ef39dddb8 /math/math_config.h
parent	a7711a35d57cae0c9fcf0cd61903bbf4701240cf (diff)
download	arm-optimized-routines-d69e504577169c5f75803f1b97a42822898a78b3.tar.gz