pl/math: Add scalar and vector/Neon sinh

New routines are based on the single-precision versions and are accurate to 3 ULP.
author: Joe Ramsay <Joe.Ramsay@arm.com> 2022-11-17 10:42:20 +0000
committer: Joe Ramsay <joe.ramsay@arm.com> 2022-11-17 10:42:20 +0000
commit: 56a92ffa259a0e77411541a013951a3fa76d7062 (patch)
tree: 9e41522f91735592dfcc1ff3a22b6441b44d1a49 /pl/math
parent: c1cf1eb0ad5fb98c4c14e8e83e00b779d1e646a2 (diff)
download: arm-optimized-routines-56a92ffa259a0e77411541a013951a3fa76d7062.tar.gz
10 files changed, 171 insertions, 1 deletions
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h
index 9ebe539..96da00a 100644
--- a/pl/math/include/mathlib.h
+++ b/pl/math/include/mathlib.h
@@ -28,6 +28,7 @@ double erfc (double);
 double expm1 (double);
 double log10 (double);
 double log1p (double);
+double sinh (double);
 
 float __s_asinhf (float);
 float __s_atanf (float);
@@ -50,6 +51,7 @@ double __s_expm1 (double);
 double __s_log10 (double);
 double __s_log1p (double);
 double __s_log2 (double);
+double __s_sinh (double);
 
 #if __aarch64__
 #if __GNUC__ >= 5
@@ -82,6 +84,7 @@ __f64x2_t __v_log1p (__f64x2_t);
 __f32x4_t __v_log2f (__f32x4_t);
 __f64x2_t __v_log2 (__f64x2_t);
 __f32x4_t __v_sinhf (__f32x4_t);
+__f64x2_t __v_sinh (__f64x2_t);
 __f32x4_t __v_tanf (__f32x4_t);
 
 #if __GNUC__ >= 9 || __clang_major__ >= 8
@@ -107,6 +110,7 @@ __vpcs __f64x2_t __vn_log1p (__f64x2_t);
 __vpcs __f32x4_t __vn_log2f (__f32x4_t);
 __vpcs __f64x2_t __vn_log2 (__f64x2_t);
 __vpcs __f32x4_t __vn_sinhf (__f32x4_t);
+__vpcs __f64x2_t __vn_sinh (__f64x2_t);
 __vpcs __f32x4_t __vn_tanf (__f32x4_t);
 
 /* Vector functions following the vector PCS using ABI names.  */
@@ -129,6 +133,7 @@ __vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
 __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
 __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
 __vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
 __vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
 
 #endif
diff --git a/pl/math/s_sinh_3u.c b/pl/math/s_sinh_3u.c
new file mode 100644
index 0000000..2c08fa1
--- /dev/null
+++ b/pl/math/s_sinh_3u.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define SCALAR 1
+#include "v_sinh_3u.c"
diff --git a/pl/math/sinh_3u.c b/pl/math/sinh_3u.c
new file mode 100644
index 0000000..ce3ff13
--- /dev/null
+++ b/pl/math/sinh_3u.c
@@ -0,0 +1,55 @@
+/*
+ * Double-precision sinh(x) function.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define OFlowBound                                                             \
+  0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results   \
+			in NaN.  */
+
+double
+__exp_dd (double, double);
+
+/* Approximation for double-precision sinh(x) using expm1.
+   sinh(x) = (exp(x) - exp(-x)) / 2.
+   The greatest observed error is 2.57 ULP:
+   __v_sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
+				 want 0x1.ab34e59d678d9p-2.  */
+double
+sinh (double x)
+{
+  uint64_t ix = asuint64 (x);
+  uint64_t iax = ix & AbsMask;
+  double ax = asdouble (iax);
+  uint64_t sign = ix & ~AbsMask;
+  double halfsign = asdouble (Half | sign);
+
+  if (unlikely (iax >= OFlowBound))
+    {
+      /* Special values and overflow.  */
+      if (unlikely (iax > 0x7ff0000000000000))
+	return __math_invalidf (x);
+      /* expm1 overflows a little before sinh. We have to fill this
+	 gap by using a different algorithm, in this case we use a
+	 double-precision exp helper. For large x sinh(x) is dominated
+	 by exp(x), however we cannot compute exp without overflow
+	 either. We use the identity: exp(a) = (exp(a / 2)) ^ 2
+	 to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2    for x > 0
+			    ~= (exp(|x| / 2)) ^ 2 / -2   for x < 0.  */
+      double e = __exp_dd (ax / 2, 0);
+      return (e * halfsign) * e;
+    }
+
+  /* Use expm1f to retain acceptable precision for small numbers.
+     Let t = e^(|x|) - 1.  */
+  double t = expm1 (ax);
+  /* Then sinh(x) = (t + t / (t + 1)) / 2   for x > 0
+		    (t + t / (t + 1)) / -2  for x < 0.  */
+  return (t + t / (t + 1)) * halfsign;
+}
diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h
index 9289f45..49208de 100644
--- a/pl/math/test/mathbench_funcs.h
+++ b/pl/math/test/mathbench_funcs.h
@@ -55,6 +55,7 @@ D (log1p, -0.9, 10.0)
 D (log2, 0.01, 11.1)
 {"powi", 'd', 0, 0.01, 11.1, {.d = powi_wrap}},
 D (sin, -3.1, 3.1)
+D (sinh, -10.0, 10.0)
 
 #if WANT_VMATH
 ZVNF (asinhf, -10.0, 10.0)
@@ -74,6 +75,7 @@ ZVND (log1p, -0.9, 10.0)
 ZVNF (log2f, 0.01, 11.1)
 ZVND (log2, 0.01, 11.1)
 ZVNF (sinhf, -10.0, 10.0)
+ZVND (sinh, -10.0, 10.0)
 ZVNF (tanf, -3.1, 3.1)
 {"__s_atan2f", 'f', 0, -10.0, 10.0, {.f = __s_atan2f_wrap}},
 {"__s_atan2", 'd', 0, -10.0, 10.0, {.d = __s_atan2_wrap}},
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
index c92892b..8835db1 100755
--- a/pl/math/test/runulp.sh
+++ b/pl/math/test/runulp.sh
@@ -179,7 +179,6 @@ t coshf -0             -0x1p-63         100
 t coshf -0             -0x1.5a92d8p+6   80000
 t coshf -0x1.5a92d8p+6 -inf             2000
 
-
 L=1.68
 t expm1  0                     0x1p-51              1000
 t expm1 -0                    -0x1p-51              1000
@@ -188,6 +187,14 @@ t expm1 -0x1p-51              -0x1.740bf7c0d927dp+9 100000
 t expm1  0x1.63108c75a1937p+9  inf                  100
 t expm1 -0x1.740bf7c0d927dp+9 -inf                  100
 
+L=2.08
+t sinh  0                    0x1p-51             100
+t sinh -0                   -0x1p-51             100
+t sinh  0x1p-51              0x1.62e42fefa39fp+9 100000
+t sinh -0x1p-51             -0x1.62e42fefa39fp+9 100000
+t sinh  0x1.62e42fefa39fp+9  inf                 1000
+t sinh -0x1.62e42fefa39fp+9 -inf                 1000
+
 done
 
 # vector functions
@@ -393,6 +400,15 @@ range_expm1='
  -0x1.740bf7c0d927dp+9 -inf                  100
 '
 
+range_sinh='
+  0                    0x1p-51             100
+ -0                   -0x1p-51             100
+  0x1p-51              0x1.62e42fefa39fp+9 100000
+ -0x1p-51             -0x1.62e42fefa39fp+9 100000
+  0x1.62e42fefa39fp+9  inf                 1000
+ -0x1.62e42fefa39fp+9 -inf                 1000
+'
+
 range_sve_cosf='
  0    0xffff0000    10000
  0x1p-4    0x1p4    500000
@@ -556,6 +572,7 @@ L_expm1f=1.02
 L_sinhf=1.76
 L_coshf=1.89
 L_expm1=1.68
+L_sinh=2.08
 
 L_sve_cosf=1.57
 L_sve_cos=1.61
@@ -638,6 +655,10 @@ expm1  __s_expm1       $runs    fenv
 expm1  __v_expm1       $runv    fenv
 expm1  __vn_expm1      $runvn   fenv
 expm1  _ZGVnN2v_expm1  $runvn   fenv
+sinh   __s_sinh        $runs    fenv
+sinh   __v_sinh        $runv    fenv
+sinh   __vn_sinh       $runvn   fenv
+sinh   _ZGVnN2v_sinh   $runvn   fenv
 
 atanf  __s_atanf       $runs
 atanf  __v_atanf       $runv
diff --git a/pl/math/test/testcases/directed/sinh.tst b/pl/math/test/testcases/directed/sinh.tst
new file mode 100644
index 0000000..d8c7d91
--- /dev/null
+++ b/pl/math/test/testcases/directed/sinh.tst
@@ -0,0 +1,21 @@
+; sinh.tst
+;
+; Copyright 1999-2022, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=sinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=sinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=sinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=sinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=sinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=sinh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=sinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
+func=sinh op1=ffefffff.ffffffff result=fff00000.00000000 errno=ERANGE status=ox
+func=sinh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=sinh op1=80000000.00000000 result=80000000.00000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=sinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=sinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h
index a6c3866..b3bd940 100644
--- a/pl/math/test/ulp_funcs.h
+++ b/pl/math/test/ulp_funcs.h
@@ -51,6 +51,7 @@ D1 (erfc)
 D1 (expm1)
 D1 (log10)
 D1 (log1p)
+D1 (sinh)
 #if WANT_VMATH
 _ZVNF1 (asinh)
 _ZVNF1 (atan)
@@ -71,6 +72,7 @@ _ZVND1 (log1p)
 _ZVNF1 (log2)
 _ZVND1 (log2)
 _ZVNF1 (sinh)
+_ZVND1 (sinh)
 _ZVNF1 (tan)
 #if WANT_SVE_MATH
 _ZSVF2 (atan2)
diff --git a/pl/math/test/ulp_wrappers.h b/pl/math/test/ulp_wrappers.h
index 18dfe13..91f8e76 100644
--- a/pl/math/test/ulp_wrappers.h
+++ b/pl/math/test/ulp_wrappers.h
@@ -135,6 +135,7 @@ ZVND1_WRAP(expm1)
 ZVND1_WRAP(log10)
 ZVND1_WRAP(log1p)
 ZVND1_WRAP(log2)
+ZVND1_WRAP(sinh)
 #if WANT_SVE_MATH
 ZSVNF2_WRAP(atan2)
 ZSVNF1_WRAP(atan)
diff --git a/pl/math/v_sinh_3u.c b/pl/math/v_sinh_3u.c
new file mode 100644
index 0000000..c707364
--- /dev/null
+++ b/pl/math/v_sinh_3u.c
@@ -0,0 +1,45 @@
+/*
+ * Double-precision vector sinh(x) function.
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "mathlib.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define OFlowBound                                                             \
+  0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results   \
+			in NaN.  */
+
+#if V_SUPPORTED
+
+/* Approximation for vector double-precision sinh(x) using expm1.
+   sinh(x) = (exp(x) - exp(-x)) / 2.
+   The greatest observed error is 2.57 ULP:
+   sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
+			     want 0x1.ab34e59d678d9p-2.  */
+VPCS_ATTR v_f64_t V_NAME (sinh) (v_f64_t x)
+{
+  v_u64_t ix = v_as_u64_f64 (x);
+  v_u64_t iax = ix & AbsMask;
+  v_f64_t ax = v_as_f64_u64 (iax);
+  v_u64_t sign = ix & ~AbsMask;
+  v_f64_t halfsign = v_as_f64_u64 (sign | Half);
+
+  v_u64_t special = v_cond_u64 (iax >= OFlowBound);
+  /* Fall back to the scalar variant for all lanes if any of them should trigger
+     an exception.  */
+  if (unlikely (v_any_u64 (special)))
+    return v_call_f64 (sinh, x, x, v_u64 (-1));
+
+  /* Up to the point that expm1 overflows, we can use it to calculate sinh
+     using a slight rearrangement of the definition of asinh. This allows us to
+     retain acceptable accuracy for very small inputs.  */
+  v_f64_t t = V_NAME (expm1) (ax);
+  return (t + t / (t + 1)) * halfsign;
+}
+VPCS_ALIAS
+
+#endif
diff --git a/pl/math/vn_sinh_3u.c b/pl/math/vn_sinh_3u.c
new file mode 100644
index 0000000..2b68578
--- /dev/null
+++ b/pl/math/vn_sinh_3u.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_sinh.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "include/mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS strong_alias (__vn_sinh, _ZGVnN2v_sinh)
+#include "v_sinh_3u.c"
+#endif
author	Joe Ramsay <Joe.Ramsay@arm.com>	2022-11-17 10:42:20 +0000
committer	Joe Ramsay <joe.ramsay@arm.com>	2022-11-17 10:42:20 +0000
commit	56a92ffa259a0e77411541a013951a3fa76d7062 (patch)
tree	9e41522f91735592dfcc1ff3a22b6441b44d1a49 /pl/math
parent	c1cf1eb0ad5fb98c4c14e8e83e00b779d1e646a2 (diff)
download	arm-optimized-routines-56a92ffa259a0e77411541a013951a3fa76d7062.tar.gz