aboutsummaryrefslogtreecommitdiff
path: root/pl/math
diff options
context:
space:
mode:
authorJoe Ramsay <Joe.Ramsay@arm.com>2022-11-17 10:42:20 +0000
committerJoe Ramsay <joe.ramsay@arm.com>2022-11-17 10:42:20 +0000
commit56a92ffa259a0e77411541a013951a3fa76d7062 (patch)
tree9e41522f91735592dfcc1ff3a22b6441b44d1a49 /pl/math
parentc1cf1eb0ad5fb98c4c14e8e83e00b779d1e646a2 (diff)
downloadarm-optimized-routines-56a92ffa259a0e77411541a013951a3fa76d7062.tar.gz
pl/math: Add scalar and vector/Neon sinh
New routines are based on the single-precision versions and are accurate to 3 ULP.
Diffstat (limited to 'pl/math')
-rw-r--r--pl/math/include/mathlib.h5
-rw-r--r--pl/math/s_sinh_3u.c6
-rw-r--r--pl/math/sinh_3u.c55
-rw-r--r--pl/math/test/mathbench_funcs.h2
-rwxr-xr-xpl/math/test/runulp.sh23
-rw-r--r--pl/math/test/testcases/directed/sinh.tst21
-rw-r--r--pl/math/test/ulp_funcs.h2
-rw-r--r--pl/math/test/ulp_wrappers.h1
-rw-r--r--pl/math/v_sinh_3u.c45
-rw-r--r--pl/math/vn_sinh_3u.c12
10 files changed, 171 insertions, 1 deletions
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h
index 9ebe539..96da00a 100644
--- a/pl/math/include/mathlib.h
+++ b/pl/math/include/mathlib.h
@@ -28,6 +28,7 @@ double erfc (double);
double expm1 (double);
double log10 (double);
double log1p (double);
+double sinh (double);
float __s_asinhf (float);
float __s_atanf (float);
@@ -50,6 +51,7 @@ double __s_expm1 (double);
double __s_log10 (double);
double __s_log1p (double);
double __s_log2 (double);
+double __s_sinh (double);
#if __aarch64__
#if __GNUC__ >= 5
@@ -82,6 +84,7 @@ __f64x2_t __v_log1p (__f64x2_t);
__f32x4_t __v_log2f (__f32x4_t);
__f64x2_t __v_log2 (__f64x2_t);
__f32x4_t __v_sinhf (__f32x4_t);
+__f64x2_t __v_sinh (__f64x2_t);
__f32x4_t __v_tanf (__f32x4_t);
#if __GNUC__ >= 9 || __clang_major__ >= 8
@@ -107,6 +110,7 @@ __vpcs __f64x2_t __vn_log1p (__f64x2_t);
__vpcs __f32x4_t __vn_log2f (__f32x4_t);
__vpcs __f64x2_t __vn_log2 (__f64x2_t);
__vpcs __f32x4_t __vn_sinhf (__f32x4_t);
+__vpcs __f64x2_t __vn_sinh (__f64x2_t);
__vpcs __f32x4_t __vn_tanf (__f32x4_t);
/* Vector functions following the vector PCS using ABI names. */
@@ -129,6 +133,7 @@ __vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
__vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
__vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
#endif
diff --git a/pl/math/s_sinh_3u.c b/pl/math/s_sinh_3u.c
new file mode 100644
index 0000000..2c08fa1
--- /dev/null
+++ b/pl/math/s_sinh_3u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_sinh_3u.c"
diff --git a/pl/math/sinh_3u.c b/pl/math/sinh_3u.c
new file mode 100644
index 0000000..ce3ff13
--- /dev/null
+++ b/pl/math/sinh_3u.c
@@ -0,0 +1,55 @@
+/*
+ * Double-precision sinh(x) function.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define OFlowBound \
+ 0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results \
+ in NaN. */
+
+double
+__exp_dd (double, double);
+
+/* Approximation for double-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The greatest observed error is 2.57 ULP:
+ __v_sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
+ want 0x1.ab34e59d678d9p-2. */
+double
+sinh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iax = ix & AbsMask;
+ double ax = asdouble (iax);
+ uint64_t sign = ix & ~AbsMask;
+ double halfsign = asdouble (Half | sign);
+
+ if (unlikely (iax >= OFlowBound))
+ {
+ /* Special values and overflow. */
+ if (unlikely (iax > 0x7ff0000000000000))
+ return __math_invalidf (x);
+ /* expm1 overflows a little before sinh. We have to fill this
+ gap by using a different algorithm, in this case we use a
+ double-precision exp helper. For large x sinh(x) is dominated
+ by exp(x), however we cannot compute exp without overflow
+ either. We use the identity: exp(a) = (exp(a / 2)) ^ 2
+ to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2 for x > 0
+ ~= (exp(|x| / 2)) ^ 2 / -2 for x < 0. */
+ double e = __exp_dd (ax / 2, 0);
+ return (e * halfsign) * e;
+ }
+
+ /* Use expm1f to retain acceptable precision for small numbers.
+ Let t = e^(|x|) - 1. */
+ double t = expm1 (ax);
+ /* Then sinh(x) = (t + t / (t + 1)) / 2 for x > 0
+ (t + t / (t + 1)) / -2 for x < 0. */
+ return (t + t / (t + 1)) * halfsign;
+}
diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h
index 9289f45..49208de 100644
--- a/pl/math/test/mathbench_funcs.h
+++ b/pl/math/test/mathbench_funcs.h
@@ -55,6 +55,7 @@ D (log1p, -0.9, 10.0)
D (log2, 0.01, 11.1)
{"powi", 'd', 0, 0.01, 11.1, {.d = powi_wrap}},
D (sin, -3.1, 3.1)
+D (sinh, -10.0, 10.0)
#if WANT_VMATH
ZVNF (asinhf, -10.0, 10.0)
@@ -74,6 +75,7 @@ ZVND (log1p, -0.9, 10.0)
ZVNF (log2f, 0.01, 11.1)
ZVND (log2, 0.01, 11.1)
ZVNF (sinhf, -10.0, 10.0)
+ZVND (sinh, -10.0, 10.0)
ZVNF (tanf, -3.1, 3.1)
{"__s_atan2f", 'f', 0, -10.0, 10.0, {.f = __s_atan2f_wrap}},
{"__s_atan2", 'd', 0, -10.0, 10.0, {.d = __s_atan2_wrap}},
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
index c92892b..8835db1 100755
--- a/pl/math/test/runulp.sh
+++ b/pl/math/test/runulp.sh
@@ -179,7 +179,6 @@ t coshf -0 -0x1p-63 100
t coshf -0 -0x1.5a92d8p+6 80000
t coshf -0x1.5a92d8p+6 -inf 2000
-
L=1.68
t expm1 0 0x1p-51 1000
t expm1 -0 -0x1p-51 1000
@@ -188,6 +187,14 @@ t expm1 -0x1p-51 -0x1.740bf7c0d927dp+9 100000
t expm1 0x1.63108c75a1937p+9 inf 100
t expm1 -0x1.740bf7c0d927dp+9 -inf 100
+L=2.08
+t sinh 0 0x1p-51 100
+t sinh -0 -0x1p-51 100
+t sinh 0x1p-51 0x1.62e42fefa39fp+9 100000
+t sinh -0x1p-51 -0x1.62e42fefa39fp+9 100000
+t sinh 0x1.62e42fefa39fp+9 inf 1000
+t sinh -0x1.62e42fefa39fp+9 -inf 1000
+
done
# vector functions
@@ -393,6 +400,15 @@ range_expm1='
-0x1.740bf7c0d927dp+9 -inf 100
'
+range_sinh='
+ 0 0x1p-51 100
+ -0 -0x1p-51 100
+ 0x1p-51 0x1.62e42fefa39fp+9 100000
+ -0x1p-51 -0x1.62e42fefa39fp+9 100000
+ 0x1.62e42fefa39fp+9 inf 1000
+ -0x1.62e42fefa39fp+9 -inf 1000
+'
+
range_sve_cosf='
0 0xffff0000 10000
0x1p-4 0x1p4 500000
@@ -556,6 +572,7 @@ L_expm1f=1.02
L_sinhf=1.76
L_coshf=1.89
L_expm1=1.68
+L_sinh=2.08
L_sve_cosf=1.57
L_sve_cos=1.61
@@ -638,6 +655,10 @@ expm1 __s_expm1 $runs fenv
expm1 __v_expm1 $runv fenv
expm1 __vn_expm1 $runvn fenv
expm1 _ZGVnN2v_expm1 $runvn fenv
+sinh __s_sinh $runs fenv
+sinh __v_sinh $runv fenv
+sinh __vn_sinh $runvn fenv
+sinh _ZGVnN2v_sinh $runvn fenv
atanf __s_atanf $runs
atanf __v_atanf $runv
diff --git a/pl/math/test/testcases/directed/sinh.tst b/pl/math/test/testcases/directed/sinh.tst
new file mode 100644
index 0000000..d8c7d91
--- /dev/null
+++ b/pl/math/test/testcases/directed/sinh.tst
@@ -0,0 +1,21 @@
+; sinh.tst
+;
+; Copyright 1999-2022, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=sinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=sinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=sinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=sinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=sinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=sinh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=sinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
+func=sinh op1=ffefffff.ffffffff result=fff00000.00000000 errno=ERANGE status=ox
+func=sinh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=sinh op1=80000000.00000000 result=80000000.00000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=sinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=sinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h
index a6c3866..b3bd940 100644
--- a/pl/math/test/ulp_funcs.h
+++ b/pl/math/test/ulp_funcs.h
@@ -51,6 +51,7 @@ D1 (erfc)
D1 (expm1)
D1 (log10)
D1 (log1p)
+D1 (sinh)
#if WANT_VMATH
_ZVNF1 (asinh)
_ZVNF1 (atan)
@@ -71,6 +72,7 @@ _ZVND1 (log1p)
_ZVNF1 (log2)
_ZVND1 (log2)
_ZVNF1 (sinh)
+_ZVND1 (sinh)
_ZVNF1 (tan)
#if WANT_SVE_MATH
_ZSVF2 (atan2)
diff --git a/pl/math/test/ulp_wrappers.h b/pl/math/test/ulp_wrappers.h
index 18dfe13..91f8e76 100644
--- a/pl/math/test/ulp_wrappers.h
+++ b/pl/math/test/ulp_wrappers.h
@@ -135,6 +135,7 @@ ZVND1_WRAP(expm1)
ZVND1_WRAP(log10)
ZVND1_WRAP(log1p)
ZVND1_WRAP(log2)
+ZVND1_WRAP(sinh)
#if WANT_SVE_MATH
ZSVNF2_WRAP(atan2)
ZSVNF1_WRAP(atan)
diff --git a/pl/math/v_sinh_3u.c b/pl/math/v_sinh_3u.c
new file mode 100644
index 0000000..c707364
--- /dev/null
+++ b/pl/math/v_sinh_3u.c
@@ -0,0 +1,45 @@
+/*
+ * Double-precision vector sinh(x) function.
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "mathlib.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define OFlowBound \
+ 0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results \
+ in NaN. */
+
+#if V_SUPPORTED
+
+/* Approximation for vector double-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The greatest observed error is 2.57 ULP:
+ sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
+ want 0x1.ab34e59d678d9p-2. */
+VPCS_ATTR v_f64_t V_NAME (sinh) (v_f64_t x)
+{
+ v_u64_t ix = v_as_u64_f64 (x);
+ v_u64_t iax = ix & AbsMask;
+ v_f64_t ax = v_as_f64_u64 (iax);
+ v_u64_t sign = ix & ~AbsMask;
+ v_f64_t halfsign = v_as_f64_u64 (sign | Half);
+
+ v_u64_t special = v_cond_u64 (iax >= OFlowBound);
+ /* Fall back to the scalar variant for all lanes if any of them should trigger
+ an exception. */
+ if (unlikely (v_any_u64 (special)))
+ return v_call_f64 (sinh, x, x, v_u64 (-1));
+
+ /* Up to the point that expm1 overflows, we can use it to calculate sinh
+ using a slight rearrangement of the definition of asinh. This allows us to
+ retain acceptable accuracy for very small inputs. */
+ v_f64_t t = V_NAME (expm1) (ax);
+ return (t + t / (t + 1)) * halfsign;
+}
+VPCS_ALIAS
+
+#endif
diff --git a/pl/math/vn_sinh_3u.c b/pl/math/vn_sinh_3u.c
new file mode 100644
index 0000000..2b68578
--- /dev/null
+++ b/pl/math/vn_sinh_3u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_sinh.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS strong_alias (__vn_sinh, _ZGVnN2v_sinh)
+#include "v_sinh_3u.c"
+#endif