aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Ramsay <Joe.Ramsay@arm.com>2022-07-12 09:13:09 +0100
committerJoe Ramsay <joe.ramsay@arm.com>2022-07-12 09:13:09 +0100
commitea3ad6c20ddd99aa1ee3f86e8f4bc5fe76cee35c (patch)
tree4ce195532f2d8bfafa38425b7b7ea51be0e7104d
parentd9a816bb547d3acc29e343c85dc568ff86add1c9 (diff)
downloadarm-optimized-routines-ea3ad6c20ddd99aa1ee3f86e8f4bc5fe76cee35c.tar.gz
pl/math: Add scalar asinhf
asinhf depends on logf, which has been copied over from the main math directory. The only modification was to change the name logf to optr_aor_log_f32 to resolve any ambiguity with libm. Worst-case error is about 3.4 ULP, at very large input. There are 4 intervals with slightly different error behaviour, as follows: Interval Worst-case accuracy (ulp) |x| < 2^-12 0 |x| < 1 1.3 |x| < sqrt(FLT_MAX) 2.0 |x| < infinity 3.4
-rw-r--r--pl/math/asinhf_3u5.c77
-rw-r--r--pl/math/asinhf_data.c14
-rw-r--r--pl/math/include/mathlib.h1
-rw-r--r--pl/math/logf.c75
-rw-r--r--pl/math/math_config.h6
-rw-r--r--pl/math/test/mathbench_funcs.h1
-rwxr-xr-xpl/math/test/runulp.sh6
-rw-r--r--pl/math/test/testcases/directed/asinhf.tst18
-rw-r--r--pl/math/test/ulp_funcs.h1
-rw-r--r--pl/math/tools/asinhf.sollya29
10 files changed, 228 insertions, 0 deletions
diff --git a/pl/math/asinhf_3u5.c b/pl/math/asinhf_3u5.c
new file mode 100644
index 0000000..10f9f31
--- /dev/null
+++ b/pl/math/asinhf_3u5.c
@@ -0,0 +1,77 @@
+/*
+ * Single-precision asinh(x) function.
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define AbsMask (0x7fffffff)
+#define SqrtFltMax (0x1.749e96p+10f)
+#define Ln2 (0x1.62e4p-1f)
+#define One (0x3f8)
+#define ExpM12 (0x398)
+#define QNaN (0x7fc)
+
+#define C(i) __asinhf_data.coeffs[i]
+
+float
+optr_aor_log_f32 (float);
+
+/* asinhf approximation using a variety of approaches on different intervals:
+
+ |x| < 2^-12: Return x. Function is exactly rounded in this region.
+
+ |x| < 1.0: Use custom order-8 polynomial. The largest observed
+ error in this region is 1.3ulps:
+ asinhf(0x1.f0f74cp-1) got 0x1.b88de4p-1 want 0x1.b88de2p-1.
+
+ |x| <= SqrtFltMax: Calculate the result directly using the
+ definition of asinh(x) = ln(x + sqrt(x*x + 1)). The largest
+ observed error in this region is 1.99ulps.
+ asinhf(0x1.00e358p+0) got 0x1.c4849ep-1 want 0x1.c484a2p-1.
+
+ |x| > SqrtFltMax: We cannot square x without overflow at a low
+ cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
+ even double x without overflow, so calculate this as ln(x) +
+ ln(2). This largest observed error in this region is 3.39ulps.
+ asinhf(0x1.749e9ep+10) got 0x1.fffff8p+2 want 0x1.fffffep+2. */
+float
+asinhf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 20;
+ float ax = asfloat (ia);
+ uint32_t sign = ix & ~AbsMask;
+
+ if (ia12 < ExpM12 || ia12 == QNaN)
+ {
+ return x;
+ }
+
+ if (ia12 < One)
+ {
+ float x2 = ax * ax;
+ float x4 = x2 * x2;
+
+ float p_01 = fmaf (ax, C (1), C (0));
+ float p_23 = fmaf (ax, C (3), C (2));
+ float p_45 = fmaf (ax, C (5), C (4));
+ float p_67 = fmaf (ax, C (7), C (6));
+
+ float p_03 = fmaf (x2, p_23, p_01);
+ float p_47 = fmaf (x2, p_67, p_45);
+
+ float p = fmaf (x4, p_47, p_03);
+ float y = fmaf (x2, p, ax);
+ return asfloat (asuint (y) | sign);
+ }
+
+ if (unlikely (ax > SqrtFltMax))
+ {
+ return asfloat (asuint (optr_aor_log_f32 (ax) + Ln2) | sign);
+ }
+
+ return asfloat (asuint (optr_aor_log_f32 (ax + sqrtf (ax * ax + 1))) | sign);
+}
diff --git a/pl/math/asinhf_data.c b/pl/math/asinhf_data.c
new file mode 100644
index 0000000..ce9b632
--- /dev/null
+++ b/pl/math/asinhf_data.c
@@ -0,0 +1,14 @@
+/*
+ * Coefficients for single-precision asinh(x) function.
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya for
+ these coeffs were generated. */
+const struct asinhf_data __asinhf_data
+ = {.coeffs
+ = {-0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f, 0x1.3a81dcp-4f,
+ 0x1.65bbaap-10f, -0x1.057f1p-4f, 0x1.6c1d46p-5f, -0x1.4cafe8p-7f}};
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h
index 0b7a745..c566d12 100644
--- a/pl/math/include/mathlib.h
+++ b/pl/math/include/mathlib.h
@@ -9,6 +9,7 @@
#ifndef _MATHLIB_H
#define _MATHLIB_H
+float asinhf (float);
float atan2f (float, float);
float erfcf (float);
float erff (float);
diff --git a/pl/math/logf.c b/pl/math/logf.c
new file mode 100644
index 0000000..2962ee7
--- /dev/null
+++ b/pl/math/logf.c
@@ -0,0 +1,75 @@
+/*
+ * Single-precision log function.
+ *
+ * Copyright (c) 2017-2019, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4
+
+ULP error: 0.818 (nearest rounding.)
+Relative error: 1.957 * 2^-26 (before rounding.)
+*/
+
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
+
+float
+optr_aor_log_f32 (float x)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t z, r, r2, y, y0, invc, logc;
+ uint32_t ix, iz, tmp;
+ int k, i;
+
+ ix = asuint (x);
+#if WANT_ROUNDING
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (unlikely (ix == 0x3f800000))
+ return 0;
+#endif
+ if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+ {
+ /* x < 0x1p-126 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzerof (1);
+ if (ix == 0x7f800000) /* log(inf) == inf. */
+ return x;
+ if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+ return __math_invalidf (x);
+ /* x is subnormal, normalize it. */
+ ix = asuint (x * 0x1p23f);
+ ix -= 23 << 23;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+ k = (int32_t) tmp >> 23; /* arithmetic shift */
+ iz = ix - (tmp & 0x1ff << 23);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = (double_t) asfloat (iz);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+ r = z * invc - 1;
+ y0 = logc + (double_t) k * Ln2;
+
+ /* Pipelined polynomial evaluation to approximate log1p(r). */
+ r2 = r * r;
+ y = A[1] * r + A[2];
+ y = A[0] * r2 + y;
+ y = y * r2 + (y0 + r);
+ return eval_as_float (y);
+}
diff --git a/pl/math/math_config.h b/pl/math/math_config.h
index 231d6bc..e77170e 100644
--- a/pl/math/math_config.h
+++ b/pl/math/math_config.h
@@ -419,4 +419,10 @@ extern const struct atanf_poly_data
{
float poly[ATANF_POLY_NCOEFFS];
} __atanf_poly_data HIDDEN;
+
+#define ASINHF_NCOEFFS 8
+extern const struct asinhf_data
+{
+ float coeffs[ASINHF_NCOEFFS];
+} __asinhf_data HIDDEN;
#endif
diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h
index 0f8e0ca..45ee627 100644
--- a/pl/math/test/mathbench_funcs.h
+++ b/pl/math/test/mathbench_funcs.h
@@ -5,6 +5,7 @@
* Copyright (c) 2022, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
+F (asinhf, -10.0, 10.0)
F (atanf, -10.0, 10.0)
{"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}},
F (erfcf, -4.0, 10.0)
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
index 17b13ae..746562d 100755
--- a/pl/math/test/runulp.sh
+++ b/pl/math/test/runulp.sh
@@ -85,6 +85,12 @@ t atan2f 0.0 1.0 40000
t atan2f 1.0 100.0 40000
t atan2f 1e6 1e32 40000
+L=3.0
+t asinhf 0 0x1p-12 5000
+t asinhf 0x1p-12 1.0 50000
+t asinhf 1.0 0x1p11 50000
+t asinhf 0x1p11 0x1p127 20000
+
done
# vector functions
diff --git a/pl/math/test/testcases/directed/asinhf.tst b/pl/math/test/testcases/directed/asinhf.tst
new file mode 100644
index 0000000..d832056
--- /dev/null
+++ b/pl/math/test/testcases/directed/asinhf.tst
@@ -0,0 +1,18 @@
+; asinhf.tst
+;
+; Copyright (c) 2007-2022, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=asinhf op1=7fc00001 result=7fc00001 errno=0
+func=asinhf op1=ffc00001 result=7fc00001 errno=0
+func=asinhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=asinhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=asinhf op1=7f800000 result=7f800000 errno=0
+func=asinhf op1=ff800000 result=ff800000 errno=0
+func=asinhf op1=00000000 result=00000000 errno=0
+func=asinhf op1=80000000 result=80000000 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=asinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=asinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h
index 40fae51..28ef25c 100644
--- a/pl/math/test/ulp_funcs.h
+++ b/pl/math/test/ulp_funcs.h
@@ -4,6 +4,7 @@
* Copyright (c) 2022, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
+F1 (asinh)
F2 (atan2)
F1 (erfc)
F1 (erf)
diff --git a/pl/math/tools/asinhf.sollya b/pl/math/tools/asinhf.sollya
new file mode 100644
index 0000000..cbe7d62
--- /dev/null
+++ b/pl/math/tools/asinhf.sollya
@@ -0,0 +1,29 @@
+// polynomial for approximating asinh(x)
+//
+// Copyright (c) 2022, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9;
+
+a = 0x1.0p-12;
+b = 1.0;
+
+f = proc(y) {
+ return asinh(x);
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = x;
+for i from 2 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do coeff(poly,i);