pl/math: Add scalar log1p

New routine uses a polynomial on reduced interval. Worst-case error is about 1.7 ULP.
author: Joe Ramsay <Joe.Ramsay@arm.com> 2022-07-12 12:25:09 +0100
committer: Joe Ramsay <joe.ramsay@arm.com> 2022-07-12 12:25:09 +0100
commit: c6e5af7d30eb825f53ec7596021f7237a0ca23e3 (patch)
tree: be17246f896644f49f22ec3bef4d2e73c020d918
parent: d082d55feea607f231358cc49a958da419fac537 (diff)
download: arm-optimized-routines-c6e5af7d30eb825f53ec7596021f7237a0ca23e3.tar.gz
9 files changed, 233 insertions, 0 deletions
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h
index bd2d8c2..8a92a47 100644
--- a/pl/math/include/mathlib.h
+++ b/pl/math/include/mathlib.h
@@ -18,6 +18,7 @@ float log10f (float);
 double asinh (double);
 double atan2 (double, double);
 double log10 (double);
+double log1p (double);
 
 float __s_atanf (float);
 float __s_atan2f (float, float);
diff --git a/pl/math/log1p_2u.c b/pl/math/log1p_2u.c
new file mode 100644
index 0000000..c214954
--- /dev/null
+++ b/pl/math/log1p_2u.c
@@ -0,0 +1,144 @@
+/*
+ * Double-precision log(1+x) function.
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define Ln2Hi 0x1.62e42fefa3800p-1
+#define Ln2Lo 0x1.ef35793c76730p-45
+#define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)).  */
+#define OneMHfRt2Top                                                           \
+  0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)).  */
+#define OneTop12 0x3ff
+#define BottomMask 0xffffffff
+#define OneMHfRt2 0x3fd2bec333018866
+#define Rt2MOne 0x3fda827999fcef32
+#define AbsMask 0x7fffffffffffffff
+#define ExpM63 0x3c00
+
+#define C(i) __log1p_data.coeffs[i]
+
+static inline double
+eval_poly (double f)
+{
+  /* Evaluate polynomial using Estrin's method.  */
+  double p_01 = fma (f, C (1), C (0));
+  double p_23 = fma (f, C (3), C (2));
+  double p_45 = fma (f, C (5), C (4));
+  double p_67 = fma (f, C (7), C (6));
+  double p_89 = fma (f, C (9), C (8));
+  double p_ab = fma (f, C (11), C (10));
+  double p_cd = fma (f, C (13), C (12));
+  double p_ef = fma (f, C (15), C (14));
+  double p_gh = fma (f, C (17), C (16));
+
+  double f2 = f * f;
+  double p_03 = fma (f2, p_23, p_01);
+  double p_47 = fma (f2, p_67, p_45);
+  double p_8b = fma (f2, p_ab, p_89);
+  double p_cf = fma (f2, p_ef, p_cd);
+  double p_gi = fma (f2, C (18), p_gh);
+
+  double f4 = f2 * f2;
+  double p_07 = fma (f4, p_47, p_03);
+  double p_8f = fma (f4, p_cf, p_8b);
+
+  double f8 = f4 * f4;
+  double p_0f = fma (f8, p_8f, p_07);
+
+  return fma (f8 * f8, p_gi, p_0f);
+}
+
+/* log1p approximation using polynomial on reduced interval. Largest
+   observed errors are near the lower boundary of the region where k
+   is 0.
+   Maximum measured error: 1.7ULP.
+   log1p(-0x1.2e515c0f31f8p-2) got  -0x1.6648c36863fc2p-2
+			       want -0x1.6648c36863fc4p-2.  */
+double
+log1p (double x)
+{
+  uint64_t ix = asuint64 (x);
+  uint64_t ia = ix & AbsMask;
+  uint32_t ia16 = ia >> 48;
+
+  /* Handle special cases first.  */
+  if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000
+		|| ix == 0x8000000000000000))
+    {
+      if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000)
+	{
+	  /* x ==  -0 => log1p(x) =  -0.
+	     x == Inf => log1p(x) = Inf.  */
+	  return x;
+	}
+      if (ix == 0xbff0000000000000)
+	{
+	  /* x == -1 => log1p(x) = -Inf.  */
+	  return __math_divzero (-1);
+	  ;
+	}
+      if (ia16 >= 0x7ff0)
+	{
+	  /* x == +/-NaN => log1p(x) = NaN.  */
+	  return __math_invalid (asdouble (ia));
+	}
+      /* x  <      -1 => log1p(x) =  NaN.
+	 x ==    -Inf => log1p(x) =  NaN.  */
+      return __math_invalid (x);
+    }
+
+  /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
+			   is in [sqrt(2)/2, sqrt(2)]):
+     log1p(x) = k*log(2) + log1p(f).
+
+     f may not be representable exactly, so we need a correction term:
+     let m = round(1 + x), c = (1 + x) - m.
+     c << m: at very small x, log1p(x) ~ x, hence:
+     log(1+x) - log(m) ~ c/m.
+
+     We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m.  */
+
+  uint64_t sign = ix & ~AbsMask;
+  if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne))
+    {
+      if (unlikely (ia16 <= ExpM63))
+	{
+	  /* If exponent of x <= -63 then shortcut the polynomial and avoid
+	     underflow by just returning x, which is exactly rounded in this
+	     region.  */
+	  return x;
+	}
+      /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the
+	 logic below, as k = 0 and f = x and therefore representable exactly.
+	 All we need is to return the polynomial.  */
+      return fma (x, eval_poly (x) * x, x);
+    }
+
+  /* Obtain correctly scaled k by manipulation in the exponent.  */
+  double m = x + 1;
+  uint64_t mi = asuint64 (m);
+  uint32_t u = (mi >> 32) + OneMHfRt2Top;
+  int32_t k = (int32_t) (u >> 20) - OneTop12;
+
+  /* Correction term c/m.  */
+  double cm = (x - (m - 1)) / m;
+
+  /* Reduce x to f in [sqrt(2)/2, sqrt(2)].  */
+  uint32_t utop = (u & 0x000fffff) + HfRt2Top;
+  uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
+  double f = asdouble (u_red) - 1;
+
+  /* Approximate log1p(x) on the reduced input using a polynomial. Because
+     log1p(0)=0 we choose an approximation of the form:
+	x + C0*x^2 + C1*x^3 + C2x^4 + ...
+     Hence approximation has the form f + f^2 * P(f)
+	where P(x) = C0 + C1*x + C2x^2 + ...  */
+  double p = fma (f, eval_poly (f) * f, f);
+
+  double kd = k;
+  double y = fma (Ln2Lo, kd, cm);
+  return y + fma (Ln2Hi, kd, p);
+}
diff --git a/pl/math/log1p_data.c b/pl/math/log1p_data.c
new file mode 100644
index 0000000..9380d13
--- /dev/null
+++ b/pl/math/log1p_data.c
@@ -0,0 +1,18 @@
+/*
+ * Data used in double-precision log(1+x) function.
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Polynomial coefficients generated using Remez algorithm, see
+   log1p.sollya for details.  */
+const struct log1p_data __log1p_data = {
+  .coeffs = {-0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
+	     0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
+	     -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
+	     0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
+	     -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
+	     0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
+	     -0x1.cfa7385bdb37ep-6}};
diff --git a/pl/math/math_config.h b/pl/math/math_config.h
index 22654ad..5e46e2d 100644
--- a/pl/math/math_config.h
+++ b/pl/math/math_config.h
@@ -452,4 +452,10 @@ extern const struct asinh_data
 {
   double poly[ASINH_NCOEFFS];
 } __asinh_data HIDDEN;
+
+#define LOG1P_NCOEFFS 19
+extern const struct log1p_data
+{
+  double coeffs[LOG1P_NCOEFFS];
+} __log1p_data HIDDEN;
 #endif
diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h
index 9fc0e32..441c937 100644
--- a/pl/math/test/mathbench_funcs.h
+++ b/pl/math/test/mathbench_funcs.h
@@ -18,6 +18,7 @@ D (atan, -10.0, 10.0)
 D (erf, -6,6)
 D (erfc, -6.0, 28.0)
 D (log10, 0.01, 11.1)
+D (log1p, -0.9, 10.0)
 
 #if WANT_VMATH
 F (__s_atanf, -10.0, 10.0)
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
index 3ef2d0e..72e5378 100755
--- a/pl/math/test/runulp.sh
+++ b/pl/math/test/runulp.sh
@@ -100,6 +100,16 @@ t asinh     -1.0  -100.0   10000
 t asinh    100.0     inf   50000
 t asinh   -100.0    -inf   10000
 
+L=2.0
+t log1p    -10.0     10.0  10000
+t log1p      0.0  0x1p-23  50000
+t log1p  0x1p-23    0.001  50000
+t log1p    0.001      1.0  50000
+t log1p      0.0 -0x1p-23  50000
+t log1p -0x1p-23   -0.001  50000
+t log1p   -0.001     -1.0  50000
+t log1p     -1.0      inf   5000
+
 done
 
 # vector functions
diff --git a/pl/math/test/testcases/directed/log1p.tst b/pl/math/test/testcases/directed/log1p.tst
new file mode 100644
index 0000000..41a1896
--- /dev/null
+++ b/pl/math/test/testcases/directed/log1p.tst
@@ -0,0 +1,22 @@
+; log1p.tst
+;
+; Copyright (c) 2009-2022, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log1p op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=log1p op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=log1p op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log1p op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log1p op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=log1p op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others.
+; The main reason seems to be the handling of errno and exceptions.
+
+func=log1p op1=00000000.00000000 result=00000000.00000000 errno=0
+func=log1p op1=80000000.00000000 result=80000000.00000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=log1p op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=log1p op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h
index 7e8145f..739ecfd 100644
--- a/pl/math/test/ulp_funcs.h
+++ b/pl/math/test/ulp_funcs.h
@@ -13,6 +13,7 @@ D1 (asinh)
 D2 (atan2)
 D1 (erfc)
 D1 (log10)
+D1 (log1p)
 #if WANT_VMATH
 F (__s_atanf, __s_atanf, atan, mpfr_atan, 1, 1, f1, 0)
 F (__s_atan, __s_atan, atanl, mpfr_atan, 1, 0, d1, 0)
diff --git a/pl/math/tools/log1p.sollya b/pl/math/tools/log1p.sollya
new file mode 100644
index 0000000..fb159b3
--- /dev/null
+++ b/pl/math/tools/log1p.sollya
@@ -0,0 +1,30 @@
+// polynomial for approximating log(1+x) in double precision
+//
+// Copyright (c) 2022, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 20;
+
+a = sqrt(2)/2-1;
+b = sqrt(2)-1;
+
+f = proc(y) {
+  return log(1+y);
+};
+
+approx = proc(poly, d) {
+  return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = x;
+for i from 2 to deg do {
+  p = roundcoefficients(approx(poly,i), [|D ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+
+print("coeffs:");
+display = hexadecimal;
+for i from 2 to deg do coeff(poly,i);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
author	Joe Ramsay <Joe.Ramsay@arm.com>	2022-07-12 12:25:09 +0100
committer	Joe Ramsay <joe.ramsay@arm.com>	2022-07-12 12:25:09 +0100
commit	c6e5af7d30eb825f53ec7596021f7237a0ca23e3 (patch)
tree	be17246f896644f49f22ec3bef4d2e73c020d918
parent	d082d55feea607f231358cc49a958da419fac537 (diff)
download	arm-optimized-routines-c6e5af7d30eb825f53ec7596021f7237a0ca23e3.tar.gz