pl/math: Improve accuracy in log10

Increase polynomial order to 12, and update summation scheme to match AOR log. New coefficients are copied from AOR log.
author: Joe Ramsay <Joe.Ramsay@arm.com> 2022-06-20 16:43:08 +0100
committer: Joe Ramsay <joe.ramsay@arm.com> 2022-06-20 16:43:08 +0100
commit: 7f8252e7a0302c08f88a30fedebe7333662f1c01 (patch)
tree: 6adfb3fb872efc81ee0d0a8cf50acca68f202bb4
parent: 570d607f2d53dc1d416ec8500487a9e261b15bb9 (diff)
download: arm-optimized-routines-7f8252e7a0302c08f88a30fedebe7333662f1c01.tar.gz
4 files changed, 48 insertions, 35 deletions
diff --git a/pl/math/log10_2u1.c b/pl/math/log10_2u.c
index 29860ab..3330389 100644
--- a/pl/math/log10_2u1.c
+++ b/pl/math/log10_2u.c
@@ -20,8 +20,8 @@
 #define InvLn10 __log10_data.invln10
 #define N (1 << LOG10_TABLE_BITS)
 #define OFF 0x3fe6000000000000
-#define LO asuint64 (1.0 - 0x1p-5)
-#define HI asuint64 (1.0 + 0x1.1p-5)
+#define LO asuint64 (1.0 - 0x1p-4)
+#define HI asuint64 (1.0 + 0x1.09p-4)
 
 /* Top 16 bits of a double.  */
 static inline uint32_t
@@ -34,17 +34,15 @@ top16 (double x)
    The implementation is similar to that of math/log, except that:
    - Polynomials are computed for log10(1+r) with r on same intervals as log.
    - Lookup parameters are scaled (at runtime) to switch from base e to base 10.
-   Max ULP error: < 1.7 ulp (nearest rounding.)
-     with (LOG10_POLY1_ORDER = 10, LOG10_POLY_ORDER = 6, N = 128)
-   Many errors above 2.08 ulp are observed across the whole range of doubles.
-   The greatest observed error is 2.09 ulp, at around 2.66e-127:
-   log10(0x1.713b77689f011p-421) got -0x1.fa4c5bacfbe41p+6
-				want -0x1.fa4c5bacfbe43p+6.  */
+   Many errors above 1.59 ulp are observed across the whole range of doubles.
+   The greatest observed error is 1.61 ulp, at around 0.965:
+   log10(0x1.dc8710333a29bp-1) got -0x1.fee26884905a6p-6
+			      want -0x1.fee26884905a8p-6.  */
 double
 log10 (double x)
 {
   /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-  double_t w, z, r, r2, r3, y, invc, logc, kd;
+  double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
   uint64_t ix, iz, tmp;
   uint32_t top;
   int k, i;
@@ -61,13 +59,23 @@ log10 (double x)
       r = x - 1.0;
       r2 = r * r;
       r3 = r * r2;
-      /* Worst-case error is around 0.727 ULP.  */
       y = r3
 	  * (B[1] + r * B[2] + r2 * B[3]
-	     + r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
-      w = B[0] * r2; /* B[0] == -0.5.  */
+	     + r3
+		 * (B[4] + r * B[5] + r2 * B[6]
+		    + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+      /* Worst-case error is around 0.507 ULP.  */
+      w = r * 0x1p27;
+      double_t rhi = r + w - w;
+      double_t rlo = r - rhi;
+      w = rhi * rhi * B[0];
+      hi = r + w;
+      lo = r - hi + w;
+      lo += B[0] * rlo * (rhi + r);
+      y += lo;
+      y += hi;
       /* Scale by 1/ln(10). Polynomial already contains scaling.  */
-      y = (y + w) + r * InvLn10;
+      y = y * InvLn10;
 
       return eval_as_double (y);
     }
@@ -109,13 +117,15 @@ log10 (double x)
 
   /* w = log(c) + k*Ln2hi.  */
   w = kd * Ln2hi + logc;
+  hi = w + r;
+  lo = w - hi + r + kd * Ln2lo;
 
   /* log10(x) = (w + r)/log(10) + (log10(1+r) - r/log(10)).  */
   r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
-  y = r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4]));
 
   /* Scale by 1/ln(10). Polynomial already contains scaling.  */
-  y = y + ((r + kd * Ln2lo) + w) * InvLn10;
+  y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+  y = y * InvLn10;
 
   return eval_as_double (y);
 }
diff --git a/pl/math/log10_data.c b/pl/math/log10_data.c
index e844203..e02e9b1 100644
--- a/pl/math/log10_data.c
+++ b/pl/math/log10_data.c
@@ -14,29 +14,32 @@ const struct log10_data __log10_data = {
 .ln2lo = 0x1.ef35793c76730p-45,
 .invln10 = 0x1.bcb7b1526e50ep-2,
 .poly1 = {
-#if LOG10_POLY1_ORDER == 10
-// relative error: 0x1.d34d5238p-63
-// in -0x1p-5 0x1.1p-5 (|log10(1+x)| > 0x1p-5 outside this interval)
--0x1.bcb7b1526e50ep-3,
-0x1.287a7636f4314p-3,
--0x1.bcb7b1526eeebp-4,
-0x1.63c62776b50e6p-4,
--0x1.287a76329b69dp-4,
-0x1.fc3f7e81f44c2p-5,
--0x1.bcb7b7893672ap-5,
-0x1.8c0fa601b4779p-5,
--0x1.64403e39d7278p-5,
+#if LOG10_POLY1_ORDER == 12
+// relative error: 0x1.c04d76cp-63
+// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
+-0x1p-1,
+0x1.5555555555577p-2,
+-0x1.ffffffffffdcbp-3,
+0x1.999999995dd0cp-3,
+-0x1.55555556745a7p-3,
+0x1.24924a344de3p-3,
+-0x1.fffffa4423d65p-4,
+0x1.c7184282ad6cap-4,
+-0x1.999eb43b068ffp-4,
+0x1.78182f7afd085p-4,
+-0x1.5521375d145cdp-4,
 #endif
 },
 .poly = {
 #if N == 128 && LOG10_POLY_ORDER == 6
-// relative error: 0x1.29fc52bp-56
+// relative error: 0x1.926199e8p-56
+// abs error: 0x1.882ff33p-65
 // in -0x1.fp-9 0x1.fp-9
--0x1.bcb7b1526e50fp-3,
-0x1.287a7636c4076p-3,
--0x1.bcb7b151bffaep-4,
-0x1.63c77372810dep-4,
--0x1.287bdeec963c2p-4,
+-0x1.0000000000001p-1,
+0x1.555555551305bp-2,
+-0x1.fffffffeb459p-3,
+0x1.999b324f10111p-3,
+-0x1.55575e506c89fp-3,
 #endif
 },
 /* Algorithm:
diff --git a/pl/math/math_config.h b/pl/math/math_config.h
index 47b192c..231d6bc 100644
--- a/pl/math/math_config.h
+++ b/pl/math/math_config.h
@@ -336,7 +336,7 @@ extern const struct logf_data
 /* Data for low accuracy log10 (with 1/ln(10) included in coefficients).  */
 #define LOG10_TABLE_BITS 7
 #define LOG10_POLY_ORDER 6
-#define LOG10_POLY1_ORDER 10
+#define LOG10_POLY1_ORDER 12
 extern const struct log10_data
 {
   double ln2hi;
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
index 674d718..17b13ae 100755
--- a/pl/math/test/runulp.sh
+++ b/pl/math/test/runulp.sh
@@ -48,7 +48,7 @@ t log10f  0x1p-26   0x1p3   50000
 t log10f  0x1p-4    0x1p4   50000
 t log10f  0         inf     50000
 
-L=1.6
+L=1.15
 Ldir=
 t log10  0 0xffff000000000000 10000
 t log10  0x1p-4    0x1p4      40000
author	Joe Ramsay <Joe.Ramsay@arm.com>	2022-06-20 16:43:08 +0100
committer	Joe Ramsay <joe.ramsay@arm.com>	2022-06-20 16:43:08 +0100
commit	7f8252e7a0302c08f88a30fedebe7333662f1c01 (patch)
tree	6adfb3fb872efc81ee0d0a8cf50acca68f202bb4
parent	570d607f2d53dc1d416ec8500487a9e261b15bb9 (diff)
download	arm-optimized-routines-7f8252e7a0302c08f88a30fedebe7333662f1c01.tar.gz