diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2022-07-12 12:25:09 +0100 |
---|---|---|
committer | Joe Ramsay <joe.ramsay@arm.com> | 2022-07-12 12:25:09 +0100 |
commit | c6e5af7d30eb825f53ec7596021f7237a0ca23e3 (patch) | |
tree | be17246f896644f49f22ec3bef4d2e73c020d918 | |
parent | d082d55feea607f231358cc49a958da419fac537 (diff) | |
download | arm-optimized-routines-c6e5af7d30eb825f53ec7596021f7237a0ca23e3.tar.gz |
pl/math: Add scalar log1p
New routine uses a polynomial on reduced interval. Worst-case error is
about 1.7 ULP.
-rw-r--r-- | pl/math/include/mathlib.h | 1 | ||||
-rw-r--r-- | pl/math/log1p_2u.c | 144 | ||||
-rw-r--r-- | pl/math/log1p_data.c | 18 | ||||
-rw-r--r-- | pl/math/math_config.h | 6 | ||||
-rw-r--r-- | pl/math/test/mathbench_funcs.h | 1 | ||||
-rwxr-xr-x | pl/math/test/runulp.sh | 10 | ||||
-rw-r--r-- | pl/math/test/testcases/directed/log1p.tst | 22 | ||||
-rw-r--r-- | pl/math/test/ulp_funcs.h | 1 | ||||
-rw-r--r-- | pl/math/tools/log1p.sollya | 30 |
9 files changed, 233 insertions, 0 deletions
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h index bd2d8c2..8a92a47 100644 --- a/pl/math/include/mathlib.h +++ b/pl/math/include/mathlib.h @@ -18,6 +18,7 @@ float log10f (float); double asinh (double); double atan2 (double, double); double log10 (double); +double log1p (double); float __s_atanf (float); float __s_atan2f (float, float); diff --git a/pl/math/log1p_2u.c b/pl/math/log1p_2u.c new file mode 100644 index 0000000..c214954 --- /dev/null +++ b/pl/math/log1p_2u.c @@ -0,0 +1,144 @@ +/* + * Double-precision log(1+x) function. + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "math_config.h" + +#define Ln2Hi 0x1.62e42fefa3800p-1 +#define Ln2Lo 0x1.ef35793c76730p-45 +#define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)). */ +#define OneMHfRt2Top \ + 0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)). */ +#define OneTop12 0x3ff +#define BottomMask 0xffffffff +#define OneMHfRt2 0x3fd2bec333018866 +#define Rt2MOne 0x3fda827999fcef32 +#define AbsMask 0x7fffffffffffffff +#define ExpM63 0x3c00 + +#define C(i) __log1p_data.coeffs[i] + +static inline double +eval_poly (double f) +{ + /* Evaluate polynomial using Estrin's method. */ + double p_01 = fma (f, C (1), C (0)); + double p_23 = fma (f, C (3), C (2)); + double p_45 = fma (f, C (5), C (4)); + double p_67 = fma (f, C (7), C (6)); + double p_89 = fma (f, C (9), C (8)); + double p_ab = fma (f, C (11), C (10)); + double p_cd = fma (f, C (13), C (12)); + double p_ef = fma (f, C (15), C (14)); + double p_gh = fma (f, C (17), C (16)); + + double f2 = f * f; + double p_03 = fma (f2, p_23, p_01); + double p_47 = fma (f2, p_67, p_45); + double p_8b = fma (f2, p_ab, p_89); + double p_cf = fma (f2, p_ef, p_cd); + double p_gi = fma (f2, C (18), p_gh); + + double f4 = f2 * f2; + double p_07 = fma (f4, p_47, p_03); + double p_8f = fma (f4, p_cf, p_8b); + + double f8 = f4 * f4; + double p_0f = fma (f8, p_8f, p_07); + + return fma (f8 * f8, p_gi, p_0f); +} + +/* log1p approximation using polynomial on reduced interval. Largest + observed errors are near the lower boundary of the region where k + is 0. + Maximum measured error: 1.7ULP. + log1p(-0x1.2e515c0f31f8p-2) got -0x1.6648c36863fc2p-2 + want -0x1.6648c36863fc4p-2. */ +double +log1p (double x) +{ + uint64_t ix = asuint64 (x); + uint64_t ia = ix & AbsMask; + uint32_t ia16 = ia >> 48; + + /* Handle special cases first. */ + if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000 + || ix == 0x8000000000000000)) + { + if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000) + { + /* x == -0 => log1p(x) = -0. + x == Inf => log1p(x) = Inf. */ + return x; + } + if (ix == 0xbff0000000000000) + { + /* x == -1 => log1p(x) = -Inf. */ + return __math_divzero (-1); + ; + } + if (ia16 >= 0x7ff0) + { + /* x == +/-NaN => log1p(x) = NaN. */ + return __math_invalid (asdouble (ia)); + } + /* x < -1 => log1p(x) = NaN. + x == -Inf => log1p(x) = NaN. */ + return __math_invalid (x); + } + + /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f + is in [sqrt(2)/2, sqrt(2)]): + log1p(x) = k*log(2) + log1p(f). + + f may not be representable exactly, so we need a correction term: + let m = round(1 + x), c = (1 + x) - m. + c << m: at very small x, log1p(x) ~ x, hence: + log(1+x) - log(m) ~ c/m. + + We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */ + + uint64_t sign = ix & ~AbsMask; + if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne)) + { + if (unlikely (ia16 <= ExpM63)) + { + /* If exponent of x <= -63 then shortcut the polynomial and avoid + underflow by just returning x, which is exactly rounded in this + region. */ + return x; + } + /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the + logic below, as k = 0 and f = x and therefore representable exactly. + All we need is to return the polynomial. */ + return fma (x, eval_poly (x) * x, x); + } + + /* Obtain correctly scaled k by manipulation in the exponent. */ + double m = x + 1; + uint64_t mi = asuint64 (m); + uint32_t u = (mi >> 32) + OneMHfRt2Top; + int32_t k = (int32_t) (u >> 20) - OneTop12; + + /* Correction term c/m. */ + double cm = (x - (m - 1)) / m; + + /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */ + uint32_t utop = (u & 0x000fffff) + HfRt2Top; + uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask); + double f = asdouble (u_red) - 1; + + /* Approximate log1p(x) on the reduced input using a polynomial. Because + log1p(0)=0 we choose an approximation of the form: + x + C0*x^2 + C1*x^3 + C2x^4 + ... + Hence approximation has the form f + f^2 * P(f) + where P(x) = C0 + C1*x + C2x^2 + ... */ + double p = fma (f, eval_poly (f) * f, f); + + double kd = k; + double y = fma (Ln2Lo, kd, cm); + return y + fma (Ln2Hi, kd, p); +} diff --git a/pl/math/log1p_data.c b/pl/math/log1p_data.c new file mode 100644 index 0000000..9380d13 --- /dev/null +++ b/pl/math/log1p_data.c @@ -0,0 +1,18 @@ +/* + * Data used in double-precision log(1+x) function. + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "math_config.h" + +/* Polynomial coefficients generated using Remez algorithm, see + log1p.sollya for details. */ +const struct log1p_data __log1p_data = { + .coeffs = {-0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2, + 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3, + -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4, + 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4, + -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5, + 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4, + -0x1.cfa7385bdb37ep-6}}; diff --git a/pl/math/math_config.h b/pl/math/math_config.h index 22654ad..5e46e2d 100644 --- a/pl/math/math_config.h +++ b/pl/math/math_config.h @@ -452,4 +452,10 @@ extern const struct asinh_data { double poly[ASINH_NCOEFFS]; } __asinh_data HIDDEN; + +#define LOG1P_NCOEFFS 19 +extern const struct log1p_data +{ + double coeffs[LOG1P_NCOEFFS]; +} __log1p_data HIDDEN; #endif diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h index 9fc0e32..441c937 100644 --- a/pl/math/test/mathbench_funcs.h +++ b/pl/math/test/mathbench_funcs.h @@ -18,6 +18,7 @@ D (atan, -10.0, 10.0) D (erf, -6,6) D (erfc, -6.0, 28.0) D (log10, 0.01, 11.1) +D (log1p, -0.9, 10.0) #if WANT_VMATH F (__s_atanf, -10.0, 10.0) diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh index 3ef2d0e..72e5378 100755 --- a/pl/math/test/runulp.sh +++ b/pl/math/test/runulp.sh @@ -100,6 +100,16 @@ t asinh -1.0 -100.0 10000 t asinh 100.0 inf 50000 t asinh -100.0 -inf 10000 +L=2.0 +t log1p -10.0 10.0 10000 +t log1p 0.0 0x1p-23 50000 +t log1p 0x1p-23 0.001 50000 +t log1p 0.001 1.0 50000 +t log1p 0.0 -0x1p-23 50000 +t log1p -0x1p-23 -0.001 50000 +t log1p -0.001 -1.0 50000 +t log1p -1.0 inf 5000 + done # vector functions diff --git a/pl/math/test/testcases/directed/log1p.tst b/pl/math/test/testcases/directed/log1p.tst new file mode 100644 index 0000000..41a1896 --- /dev/null +++ b/pl/math/test/testcases/directed/log1p.tst @@ -0,0 +1,22 @@ +; log1p.tst +; +; Copyright (c) 2009-2022, Arm Limited. +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +func=log1p op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 +func=log1p op1=fff80000.00000001 result=7ff80000.00000001 errno=0 +func=log1p op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i +func=log1p op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i +func=log1p op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i +func=log1p op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 +; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others. +; The main reason seems to be the handling of errno and exceptions. + +func=log1p op1=00000000.00000000 result=00000000.00000000 errno=0 +func=log1p op1=80000000.00000000 result=80000000.00000000 errno=0 + +; No exception is raised with certain versions of glibc. Functions +; approximated by x near zero may not generate/implement flops and +; thus may not raise exceptions. +func=log1p op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux +func=log1p op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h index 7e8145f..739ecfd 100644 --- a/pl/math/test/ulp_funcs.h +++ b/pl/math/test/ulp_funcs.h @@ -13,6 +13,7 @@ D1 (asinh) D2 (atan2) D1 (erfc) D1 (log10) +D1 (log1p) #if WANT_VMATH F (__s_atanf, __s_atanf, atan, mpfr_atan, 1, 1, f1, 0) F (__s_atan, __s_atan, atanl, mpfr_atan, 1, 0, d1, 0) diff --git a/pl/math/tools/log1p.sollya b/pl/math/tools/log1p.sollya new file mode 100644 index 0000000..fb159b3 --- /dev/null +++ b/pl/math/tools/log1p.sollya @@ -0,0 +1,30 @@ +// polynomial for approximating log(1+x) in double precision +// +// Copyright (c) 2022, Arm Limited. +// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +deg = 20; + +a = sqrt(2)/2-1; +b = sqrt(2)-1; + +f = proc(y) { + return log(1+y); +}; + +approx = proc(poly, d) { + return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); +}; + +poly = x; +for i from 2 to deg do { + p = roundcoefficients(approx(poly,i), [|D ...|]); + poly = poly + x^i*coeff(p,0); +}; + + +print("coeffs:"); +display = hexadecimal; +for i from 2 to deg do coeff(poly,i); +print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); +print("in [",a,b,"]"); |