Add new log implementation

Optimized log using carefully generated lookup table with 1/c and log(c) values for small intervalls around 1. The log(c) is very near a double precision value, it has about 62 bits precision. The algorithm is log(2^k x) = k log(2) + log(c) + log(x/c), where the last term is approximated by a polinomial of x/c - 1. Near 1 a single polynomial of x - 1 is used. There is separate code path when fma instruction is not available for computing x/c - 1 precisely, in which case the table size is doubled. With the default configuration settings the worst case error is 0.519 ULP (and 0.520 without FMA), the read only global data size is 2192 bytes (4240 without FMA). The non-nearest rounding error is less than 1 ULP. Improvements on Cortex-A72 compared to current glibc master: log latency: 1.98x log thruput: 2.92x
author: Szabolcs Nagy <szabolcs.nagy@arm.com> 2018-05-31 18:13:20 +0100
committer: Szabolcs Nagy <szabolcs.nagy@arm.com> 2018-06-04 17:32:00 +0100
commit: 56091b75f32fee23274e8e5d1518ced3c668442f (patch)
tree: be87badb12ca46026296b3a0c7c2d73e4ae36702 /math/math_config.h
parent: e08890a7ff26481b27293348e6acfb30cbe75c39 (diff)
download: arm-optimized-routines-56091b75f32fee23274e8e5d1518ced3c668442f.tar.gz
1 files changed, 23 insertions, 0 deletions
diff --git a/math/math_config.h b/math/math_config.h
index 9b9e102..3383e70 100644
--- a/math/math_config.h
+++ b/math/math_config.h
@@ -54,6 +54,15 @@
 # endif
 #endif
 
+/* Compiler can inline fma as a single instruction.  */
+#ifndef HAVE_FAST_FMA
+# if __aarch64__
+#   define HAVE_FAST_FMA 1
+# else
+#   define HAVE_FAST_FMA 0
+# endif
+#endif
+
 #if HAVE_FAST_ROUND
 # define TOINT_INTRINSICS 1
 
@@ -286,4 +295,18 @@ extern const struct exp_data {
   uint64_t tab[2*(1 << EXP_TABLE_BITS)];
 } __exp_data HIDDEN;
 
+#define LOG_TABLE_BITS 7
+#define LOG_POLY_ORDER 6
+#define LOG_POLY1_ORDER 12
+extern const struct log_data {
+  double ln2hi;
+  double ln2lo;
+  double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
+  double poly1[LOG_POLY1_ORDER - 1];
+  struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
+#if !HAVE_FAST_FMA
+  struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
+#endif
+} __log_data HIDDEN;
+
 #endif
author	Szabolcs Nagy <szabolcs.nagy@arm.com>	2018-05-31 18:13:20 +0100
committer	Szabolcs Nagy <szabolcs.nagy@arm.com>	2018-06-04 17:32:00 +0100
commit	56091b75f32fee23274e8e5d1518ced3c668442f (patch)
tree	be87badb12ca46026296b3a0c7c2d73e4ae36702 /math/math_config.h
parent	e08890a7ff26481b27293348e6acfb30cbe75c39 (diff)
download	arm-optimized-routines-56091b75f32fee23274e8e5d1518ced3c668442f.tar.gz