pl/math: Add scalar asinhf

asinhf depends on logf, which has been copied over from the main math directory. The only modification was to change the name logf to optr_aor_log_f32 to resolve any ambiguity with libm. Worst-case error is about 3.4 ULP, at very large input. There are 4 intervals with slightly different error behaviour, as follows: Interval Worst-case accuracy (ulp) |x| < 2^-12 0 |x| < 1 1.3 |x| < sqrt(FLT_MAX) 2.0 |x| < infinity 3.4
author: Joe Ramsay <Joe.Ramsay@arm.com> 2022-07-12 09:13:09 +0100
committer: Joe Ramsay <joe.ramsay@arm.com> 2022-07-12 09:13:09 +0100
commit: ea3ad6c20ddd99aa1ee3f86e8f4bc5fe76cee35c (patch)
tree: 4ce195532f2d8bfafa38425b7b7ea51be0e7104d
parent: d9a816bb547d3acc29e343c85dc568ff86add1c9 (diff)
download: arm-optimized-routines-ea3ad6c20ddd99aa1ee3f86e8f4bc5fe76cee35c.tar.gz
10 files changed, 228 insertions, 0 deletions
diff --git a/pl/math/asinhf_3u5.c b/pl/math/asinhf_3u5.c
new file mode 100644
index 0000000..10f9f31
--- /dev/null
+++ b/pl/math/asinhf_3u5.c
@@ -0,0 +1,77 @@
+/*
+ * Single-precision asinh(x) function.
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define AbsMask (0x7fffffff)
+#define SqrtFltMax (0x1.749e96p+10f)
+#define Ln2 (0x1.62e4p-1f)
+#define One (0x3f8)
+#define ExpM12 (0x398)
+#define QNaN (0x7fc)
+
+#define C(i) __asinhf_data.coeffs[i]
+
+float
+optr_aor_log_f32 (float);
+
+/* asinhf approximation using a variety of approaches on different intervals:
+
+   |x| < 2^-12: Return x. Function is exactly rounded in this region.
+
+   |x| < 1.0: Use custom order-8 polynomial. The largest observed
+     error in this region is 1.3ulps:
+     asinhf(0x1.f0f74cp-1) got 0x1.b88de4p-1 want 0x1.b88de2p-1.
+
+   |x| <= SqrtFltMax: Calculate the result directly using the
+     definition of asinh(x) = ln(x + sqrt(x*x + 1)). The largest
+     observed error in this region is 1.99ulps.
+     asinhf(0x1.00e358p+0) got 0x1.c4849ep-1 want 0x1.c484a2p-1.
+
+   |x| > SqrtFltMax: We cannot square x without overflow at a low
+     cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
+     even double x without overflow, so calculate this as ln(x) +
+     ln(2). This largest observed error in this region is 3.39ulps.
+     asinhf(0x1.749e9ep+10) got 0x1.fffff8p+2 want 0x1.fffffep+2.  */
+float
+asinhf (float x)
+{
+  uint32_t ix = asuint (x);
+  uint32_t ia = ix & AbsMask;
+  uint32_t ia12 = ia >> 20;
+  float ax = asfloat (ia);
+  uint32_t sign = ix & ~AbsMask;
+
+  if (ia12 < ExpM12 || ia12 == QNaN)
+    {
+      return x;
+    }
+
+  if (ia12 < One)
+    {
+      float x2 = ax * ax;
+      float x4 = x2 * x2;
+
+      float p_01 = fmaf (ax, C (1), C (0));
+      float p_23 = fmaf (ax, C (3), C (2));
+      float p_45 = fmaf (ax, C (5), C (4));
+      float p_67 = fmaf (ax, C (7), C (6));
+
+      float p_03 = fmaf (x2, p_23, p_01);
+      float p_47 = fmaf (x2, p_67, p_45);
+
+      float p = fmaf (x4, p_47, p_03);
+      float y = fmaf (x2, p, ax);
+      return asfloat (asuint (y) | sign);
+    }
+
+  if (unlikely (ax > SqrtFltMax))
+    {
+      return asfloat (asuint (optr_aor_log_f32 (ax) + Ln2) | sign);
+    }
+
+  return asfloat (asuint (optr_aor_log_f32 (ax + sqrtf (ax * ax + 1))) | sign);
+}
diff --git a/pl/math/asinhf_data.c b/pl/math/asinhf_data.c
new file mode 100644
index 0000000..ce9b632
--- /dev/null
+++ b/pl/math/asinhf_data.c
@@ -0,0 +1,14 @@
+/*
+ * Coefficients for single-precision asinh(x) function.
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya for
+   these coeffs were generated.  */
+const struct asinhf_data __asinhf_data
+  = {.coeffs
+     = {-0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f, 0x1.3a81dcp-4f,
+	0x1.65bbaap-10f, -0x1.057f1p-4f, 0x1.6c1d46p-5f, -0x1.4cafe8p-7f}};
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h
index 0b7a745..c566d12 100644
--- a/pl/math/include/mathlib.h
+++ b/pl/math/include/mathlib.h
@@ -9,6 +9,7 @@
 #ifndef _MATHLIB_H
 #define _MATHLIB_H
 
+float asinhf (float);
 float atan2f (float, float);
 float erfcf (float);
 float erff (float);
diff --git a/pl/math/logf.c b/pl/math/logf.c
new file mode 100644
index 0000000..2962ee7
--- /dev/null
+++ b/pl/math/logf.c
@@ -0,0 +1,75 @@
+/*
+ * Single-precision log function.
+ *
+ * Copyright (c) 2017-2019, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4
+
+ULP error: 0.818 (nearest rounding.)
+Relative error: 1.957 * 2^-26 (before rounding.)
+*/
+
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
+
+float
+optr_aor_log_f32 (float x)
+{
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, y, y0, invc, logc;
+  uint32_t ix, iz, tmp;
+  int k, i;
+
+  ix = asuint (x);
+#if WANT_ROUNDING
+  /* Fix sign of zero with downward rounding when x==1.  */
+  if (unlikely (ix == 0x3f800000))
+    return 0;
+#endif
+  if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+    {
+      /* x < 0x1p-126 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzerof (1);
+      if (ix == 0x7f800000) /* log(inf) == inf.  */
+	return x;
+      if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+	return __math_invalidf (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint (x * 0x1p23f);
+      ix -= 23 << 23;
+    }
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+  k = (int32_t) tmp >> 23; /* arithmetic shift */
+  iz = ix - (tmp & 0x1ff << 23);
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = (double_t) asfloat (iz);
+
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+  r = z * invc - 1;
+  y0 = logc + (double_t) k * Ln2;
+
+  /* Pipelined polynomial evaluation to approximate log1p(r).  */
+  r2 = r * r;
+  y = A[1] * r + A[2];
+  y = A[0] * r2 + y;
+  y = y * r2 + (y0 + r);
+  return eval_as_float (y);
+}
diff --git a/pl/math/math_config.h b/pl/math/math_config.h
index 231d6bc..e77170e 100644
--- a/pl/math/math_config.h
+++ b/pl/math/math_config.h
@@ -419,4 +419,10 @@ extern const struct atanf_poly_data
 {
   float poly[ATANF_POLY_NCOEFFS];
 } __atanf_poly_data HIDDEN;
+
+#define ASINHF_NCOEFFS 8
+extern const struct asinhf_data
+{
+  float coeffs[ASINHF_NCOEFFS];
+} __asinhf_data HIDDEN;
 #endif
diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h
index 0f8e0ca..45ee627 100644
--- a/pl/math/test/mathbench_funcs.h
+++ b/pl/math/test/mathbench_funcs.h
@@ -5,6 +5,7 @@
  * Copyright (c) 2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
+F (asinhf, -10.0, 10.0)
 F (atanf, -10.0, 10.0)
 {"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}},
 F (erfcf, -4.0, 10.0)
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
index 17b13ae..746562d 100755
--- a/pl/math/test/runulp.sh
+++ b/pl/math/test/runulp.sh
@@ -85,6 +85,12 @@ t atan2f   0.0        1.0  40000
 t atan2f   1.0      100.0  40000
 t atan2f   1e6       1e32  40000
 
+L=3.0
+t asinhf        0  0x1p-12  5000
+t asinhf  0x1p-12      1.0  50000
+t asinhf      1.0   0x1p11  50000
+t asinhf   0x1p11  0x1p127  20000
+
 done
 
 # vector functions
diff --git a/pl/math/test/testcases/directed/asinhf.tst b/pl/math/test/testcases/directed/asinhf.tst
new file mode 100644
index 0000000..d832056
--- /dev/null
+++ b/pl/math/test/testcases/directed/asinhf.tst
@@ -0,0 +1,18 @@
+; asinhf.tst
+;
+; Copyright (c) 2007-2022, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=asinhf op1=7fc00001 result=7fc00001 errno=0
+func=asinhf op1=ffc00001 result=7fc00001 errno=0
+func=asinhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=asinhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=asinhf op1=7f800000 result=7f800000 errno=0
+func=asinhf op1=ff800000 result=ff800000 errno=0
+func=asinhf op1=00000000 result=00000000 errno=0
+func=asinhf op1=80000000 result=80000000 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=asinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=asinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h
index 40fae51..28ef25c 100644
--- a/pl/math/test/ulp_funcs.h
+++ b/pl/math/test/ulp_funcs.h
@@ -4,6 +4,7 @@
  * Copyright (c) 2022, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
+F1 (asinh)
 F2 (atan2)
 F1 (erfc)
 F1 (erf)
diff --git a/pl/math/tools/asinhf.sollya b/pl/math/tools/asinhf.sollya
new file mode 100644
index 0000000..cbe7d62
--- /dev/null
+++ b/pl/math/tools/asinhf.sollya
@@ -0,0 +1,29 @@
+// polynomial for approximating asinh(x)
+//
+// Copyright (c) 2022, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9;
+
+a = 0x1.0p-12;
+b = 1.0;
+
+f = proc(y) {
+  return asinh(x);
+};
+
+approx = proc(poly, d) {
+  return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = x;
+for i from 2 to deg do {
+  p = roundcoefficients(approx(poly,i), [|SG ...|]);
+  poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do coeff(poly,i);
author	Joe Ramsay <Joe.Ramsay@arm.com>	2022-07-12 09:13:09 +0100
committer	Joe Ramsay <joe.ramsay@arm.com>	2022-07-12 09:13:09 +0100
commit	ea3ad6c20ddd99aa1ee3f86e8f4bc5fe76cee35c (patch)
tree	4ce195532f2d8bfafa38425b7b7ea51be0e7104d
parent	d9a816bb547d3acc29e343c85dc568ff86add1c9 (diff)
download	arm-optimized-routines-ea3ad6c20ddd99aa1ee3f86e8f4bc5fe76cee35c.tar.gz