aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Ramsay <Joe.Ramsay@arm.com>2022-06-20 16:43:08 +0100
committerJoe Ramsay <joe.ramsay@arm.com>2022-06-20 16:43:08 +0100
commit7f8252e7a0302c08f88a30fedebe7333662f1c01 (patch)
tree6adfb3fb872efc81ee0d0a8cf50acca68f202bb4
parent570d607f2d53dc1d416ec8500487a9e261b15bb9 (diff)
downloadarm-optimized-routines-7f8252e7a0302c08f88a30fedebe7333662f1c01.tar.gz
pl/math: Improve accuracy in log10
Increase polynomial order to 12, and update summation scheme to match AOR log. New coefficients are copied from AOR log.
-rw-r--r--pl/math/log10_2u.c (renamed from pl/math/log10_2u1.c)40
-rw-r--r--pl/math/log10_data.c39
-rw-r--r--pl/math/math_config.h2
-rwxr-xr-xpl/math/test/runulp.sh2
4 files changed, 48 insertions, 35 deletions
diff --git a/pl/math/log10_2u1.c b/pl/math/log10_2u.c
index 29860ab..3330389 100644
--- a/pl/math/log10_2u1.c
+++ b/pl/math/log10_2u.c
@@ -20,8 +20,8 @@
#define InvLn10 __log10_data.invln10
#define N (1 << LOG10_TABLE_BITS)
#define OFF 0x3fe6000000000000
-#define LO asuint64 (1.0 - 0x1p-5)
-#define HI asuint64 (1.0 + 0x1.1p-5)
+#define LO asuint64 (1.0 - 0x1p-4)
+#define HI asuint64 (1.0 + 0x1.09p-4)
/* Top 16 bits of a double. */
static inline uint32_t
@@ -34,17 +34,15 @@ top16 (double x)
The implementation is similar to that of math/log, except that:
- Polynomials are computed for log10(1+r) with r on same intervals as log.
- Lookup parameters are scaled (at runtime) to switch from base e to base 10.
- Max ULP error: < 1.7 ulp (nearest rounding.)
- with (LOG10_POLY1_ORDER = 10, LOG10_POLY_ORDER = 6, N = 128)
- Many errors above 2.08 ulp are observed across the whole range of doubles.
- The greatest observed error is 2.09 ulp, at around 2.66e-127:
- log10(0x1.713b77689f011p-421) got -0x1.fa4c5bacfbe41p+6
- want -0x1.fa4c5bacfbe43p+6. */
+ Many errors above 1.59 ulp are observed across the whole range of doubles.
+ The greatest observed error is 1.61 ulp, at around 0.965:
+ log10(0x1.dc8710333a29bp-1) got -0x1.fee26884905a6p-6
+ want -0x1.fee26884905a8p-6. */
double
log10 (double x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
- double_t w, z, r, r2, r3, y, invc, logc, kd;
+ double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
@@ -61,13 +59,23 @@ log10 (double x)
r = x - 1.0;
r2 = r * r;
r3 = r * r2;
- /* Worst-case error is around 0.727 ULP. */
y = r3
* (B[1] + r * B[2] + r2 * B[3]
- + r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
- w = B[0] * r2; /* B[0] == -0.5. */
+ + r3
+ * (B[4] + r * B[5] + r2 * B[6]
+ + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+ /* Worst-case error is around 0.507 ULP. */
+ w = r * 0x1p27;
+ double_t rhi = r + w - w;
+ double_t rlo = r - rhi;
+ w = rhi * rhi * B[0];
+ hi = r + w;
+ lo = r - hi + w;
+ lo += B[0] * rlo * (rhi + r);
+ y += lo;
+ y += hi;
/* Scale by 1/ln(10). Polynomial already contains scaling. */
- y = (y + w) + r * InvLn10;
+ y = y * InvLn10;
return eval_as_double (y);
}
@@ -109,13 +117,15 @@ log10 (double x)
/* w = log(c) + k*Ln2hi. */
w = kd * Ln2hi + logc;
+ hi = w + r;
+ lo = w - hi + r + kd * Ln2lo;
/* log10(x) = (w + r)/log(10) + (log10(1+r) - r/log(10)). */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
- y = r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4]));
/* Scale by 1/ln(10). Polynomial already contains scaling. */
- y = y + ((r + kd * Ln2lo) + w) * InvLn10;
+ y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+ y = y * InvLn10;
return eval_as_double (y);
}
diff --git a/pl/math/log10_data.c b/pl/math/log10_data.c
index e844203..e02e9b1 100644
--- a/pl/math/log10_data.c
+++ b/pl/math/log10_data.c
@@ -14,29 +14,32 @@ const struct log10_data __log10_data = {
.ln2lo = 0x1.ef35793c76730p-45,
.invln10 = 0x1.bcb7b1526e50ep-2,
.poly1 = {
-#if LOG10_POLY1_ORDER == 10
-// relative error: 0x1.d34d5238p-63
-// in -0x1p-5 0x1.1p-5 (|log10(1+x)| > 0x1p-5 outside this interval)
--0x1.bcb7b1526e50ep-3,
-0x1.287a7636f4314p-3,
--0x1.bcb7b1526eeebp-4,
-0x1.63c62776b50e6p-4,
--0x1.287a76329b69dp-4,
-0x1.fc3f7e81f44c2p-5,
--0x1.bcb7b7893672ap-5,
-0x1.8c0fa601b4779p-5,
--0x1.64403e39d7278p-5,
+#if LOG10_POLY1_ORDER == 12
+// relative error: 0x1.c04d76cp-63
+// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
+-0x1p-1,
+0x1.5555555555577p-2,
+-0x1.ffffffffffdcbp-3,
+0x1.999999995dd0cp-3,
+-0x1.55555556745a7p-3,
+0x1.24924a344de3p-3,
+-0x1.fffffa4423d65p-4,
+0x1.c7184282ad6cap-4,
+-0x1.999eb43b068ffp-4,
+0x1.78182f7afd085p-4,
+-0x1.5521375d145cdp-4,
#endif
},
.poly = {
#if N == 128 && LOG10_POLY_ORDER == 6
-// relative error: 0x1.29fc52bp-56
+// relative error: 0x1.926199e8p-56
+// abs error: 0x1.882ff33p-65
// in -0x1.fp-9 0x1.fp-9
--0x1.bcb7b1526e50fp-3,
-0x1.287a7636c4076p-3,
--0x1.bcb7b151bffaep-4,
-0x1.63c77372810dep-4,
--0x1.287bdeec963c2p-4,
+-0x1.0000000000001p-1,
+0x1.555555551305bp-2,
+-0x1.fffffffeb459p-3,
+0x1.999b324f10111p-3,
+-0x1.55575e506c89fp-3,
#endif
},
/* Algorithm:
diff --git a/pl/math/math_config.h b/pl/math/math_config.h
index 47b192c..231d6bc 100644
--- a/pl/math/math_config.h
+++ b/pl/math/math_config.h
@@ -336,7 +336,7 @@ extern const struct logf_data
/* Data for low accuracy log10 (with 1/ln(10) included in coefficients). */
#define LOG10_TABLE_BITS 7
#define LOG10_POLY_ORDER 6
-#define LOG10_POLY1_ORDER 10
+#define LOG10_POLY1_ORDER 12
extern const struct log10_data
{
double ln2hi;
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
index 674d718..17b13ae 100755
--- a/pl/math/test/runulp.sh
+++ b/pl/math/test/runulp.sh
@@ -48,7 +48,7 @@ t log10f 0x1p-26 0x1p3 50000
t log10f 0x1p-4 0x1p4 50000
t log10f 0 inf 50000
-L=1.6
+L=1.15
Ldir=
t log10 0 0xffff000000000000 10000
t log10 0x1p-4 0x1p4 40000