diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2022-06-20 16:43:08 +0100 |
---|---|---|
committer | Joe Ramsay <joe.ramsay@arm.com> | 2022-06-20 16:43:08 +0100 |
commit | 7f8252e7a0302c08f88a30fedebe7333662f1c01 (patch) | |
tree | 6adfb3fb872efc81ee0d0a8cf50acca68f202bb4 | |
parent | 570d607f2d53dc1d416ec8500487a9e261b15bb9 (diff) | |
download | arm-optimized-routines-7f8252e7a0302c08f88a30fedebe7333662f1c01.tar.gz |
pl/math: Improve accuracy in log10
Increase polynomial order to 12, and update summation scheme to
match AOR log. New coefficients are copied from AOR log.
-rw-r--r-- | pl/math/log10_2u.c (renamed from pl/math/log10_2u1.c) | 40 | ||||
-rw-r--r-- | pl/math/log10_data.c | 39 | ||||
-rw-r--r-- | pl/math/math_config.h | 2 | ||||
-rwxr-xr-x | pl/math/test/runulp.sh | 2 |
4 files changed, 48 insertions, 35 deletions
diff --git a/pl/math/log10_2u1.c b/pl/math/log10_2u.c index 29860ab..3330389 100644 --- a/pl/math/log10_2u1.c +++ b/pl/math/log10_2u.c @@ -20,8 +20,8 @@ #define InvLn10 __log10_data.invln10 #define N (1 << LOG10_TABLE_BITS) #define OFF 0x3fe6000000000000 -#define LO asuint64 (1.0 - 0x1p-5) -#define HI asuint64 (1.0 + 0x1.1p-5) +#define LO asuint64 (1.0 - 0x1p-4) +#define HI asuint64 (1.0 + 0x1.09p-4) /* Top 16 bits of a double. */ static inline uint32_t @@ -34,17 +34,15 @@ top16 (double x) The implementation is similar to that of math/log, except that: - Polynomials are computed for log10(1+r) with r on same intervals as log. - Lookup parameters are scaled (at runtime) to switch from base e to base 10. - Max ULP error: < 1.7 ulp (nearest rounding.) - with (LOG10_POLY1_ORDER = 10, LOG10_POLY_ORDER = 6, N = 128) - Many errors above 2.08 ulp are observed across the whole range of doubles. - The greatest observed error is 2.09 ulp, at around 2.66e-127: - log10(0x1.713b77689f011p-421) got -0x1.fa4c5bacfbe41p+6 - want -0x1.fa4c5bacfbe43p+6. */ + Many errors above 1.59 ulp are observed across the whole range of doubles. + The greatest observed error is 1.61 ulp, at around 0.965: + log10(0x1.dc8710333a29bp-1) got -0x1.fee26884905a6p-6 + want -0x1.fee26884905a8p-6. */ double log10 (double x) { /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ - double_t w, z, r, r2, r3, y, invc, logc, kd; + double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo; uint64_t ix, iz, tmp; uint32_t top; int k, i; @@ -61,13 +59,23 @@ log10 (double x) r = x - 1.0; r2 = r * r; r3 = r * r2; - /* Worst-case error is around 0.727 ULP. */ y = r3 * (B[1] + r * B[2] + r2 * B[3] - + r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8]))); - w = B[0] * r2; /* B[0] == -0.5. */ + + r3 + * (B[4] + r * B[5] + r2 * B[6] + + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10]))); + /* Worst-case error is around 0.507 ULP. */ + w = r * 0x1p27; + double_t rhi = r + w - w; + double_t rlo = r - rhi; + w = rhi * rhi * B[0]; + hi = r + w; + lo = r - hi + w; + lo += B[0] * rlo * (rhi + r); + y += lo; + y += hi; /* Scale by 1/ln(10). Polynomial already contains scaling. */ - y = (y + w) + r * InvLn10; + y = y * InvLn10; return eval_as_double (y); } @@ -109,13 +117,15 @@ log10 (double x) /* w = log(c) + k*Ln2hi. */ w = kd * Ln2hi + logc; + hi = w + r; + lo = w - hi + r + kd * Ln2lo; /* log10(x) = (w + r)/log(10) + (log10(1+r) - r/log(10)). */ r2 = r * r; /* rounding error: 0x1p-54/N^2. */ - y = r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])); /* Scale by 1/ln(10). Polynomial already contains scaling. */ - y = y + ((r + kd * Ln2lo) + w) * InvLn10; + y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi; + y = y * InvLn10; return eval_as_double (y); } diff --git a/pl/math/log10_data.c b/pl/math/log10_data.c index e844203..e02e9b1 100644 --- a/pl/math/log10_data.c +++ b/pl/math/log10_data.c @@ -14,29 +14,32 @@ const struct log10_data __log10_data = { .ln2lo = 0x1.ef35793c76730p-45, .invln10 = 0x1.bcb7b1526e50ep-2, .poly1 = { -#if LOG10_POLY1_ORDER == 10 -// relative error: 0x1.d34d5238p-63 -// in -0x1p-5 0x1.1p-5 (|log10(1+x)| > 0x1p-5 outside this interval) --0x1.bcb7b1526e50ep-3, -0x1.287a7636f4314p-3, --0x1.bcb7b1526eeebp-4, -0x1.63c62776b50e6p-4, --0x1.287a76329b69dp-4, -0x1.fc3f7e81f44c2p-5, --0x1.bcb7b7893672ap-5, -0x1.8c0fa601b4779p-5, --0x1.64403e39d7278p-5, +#if LOG10_POLY1_ORDER == 12 +// relative error: 0x1.c04d76cp-63 +// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval) +-0x1p-1, +0x1.5555555555577p-2, +-0x1.ffffffffffdcbp-3, +0x1.999999995dd0cp-3, +-0x1.55555556745a7p-3, +0x1.24924a344de3p-3, +-0x1.fffffa4423d65p-4, +0x1.c7184282ad6cap-4, +-0x1.999eb43b068ffp-4, +0x1.78182f7afd085p-4, +-0x1.5521375d145cdp-4, #endif }, .poly = { #if N == 128 && LOG10_POLY_ORDER == 6 -// relative error: 0x1.29fc52bp-56 +// relative error: 0x1.926199e8p-56 +// abs error: 0x1.882ff33p-65 // in -0x1.fp-9 0x1.fp-9 --0x1.bcb7b1526e50fp-3, -0x1.287a7636c4076p-3, --0x1.bcb7b151bffaep-4, -0x1.63c77372810dep-4, --0x1.287bdeec963c2p-4, +-0x1.0000000000001p-1, +0x1.555555551305bp-2, +-0x1.fffffffeb459p-3, +0x1.999b324f10111p-3, +-0x1.55575e506c89fp-3, #endif }, /* Algorithm: diff --git a/pl/math/math_config.h b/pl/math/math_config.h index 47b192c..231d6bc 100644 --- a/pl/math/math_config.h +++ b/pl/math/math_config.h @@ -336,7 +336,7 @@ extern const struct logf_data /* Data for low accuracy log10 (with 1/ln(10) included in coefficients). */ #define LOG10_TABLE_BITS 7 #define LOG10_POLY_ORDER 6 -#define LOG10_POLY1_ORDER 10 +#define LOG10_POLY1_ORDER 12 extern const struct log10_data { double ln2hi; diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh index 674d718..17b13ae 100755 --- a/pl/math/test/runulp.sh +++ b/pl/math/test/runulp.sh @@ -48,7 +48,7 @@ t log10f 0x1p-26 0x1p3 50000 t log10f 0x1p-4 0x1p4 50000 t log10f 0 inf 50000 -L=1.6 +L=1.15 Ldir= t log10 0 0xffff000000000000 10000 t log10 0x1p-4 0x1p4 40000 |