diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2022-11-17 10:42:20 +0000 |
---|---|---|
committer | Joe Ramsay <joe.ramsay@arm.com> | 2022-11-17 10:42:20 +0000 |
commit | 56a92ffa259a0e77411541a013951a3fa76d7062 (patch) | |
tree | 9e41522f91735592dfcc1ff3a22b6441b44d1a49 /pl/math | |
parent | c1cf1eb0ad5fb98c4c14e8e83e00b779d1e646a2 (diff) | |
download | arm-optimized-routines-56a92ffa259a0e77411541a013951a3fa76d7062.tar.gz |
pl/math: Add scalar and vector/Neon sinh
New routines are based on the single-precision versions and are
accurate to 3 ULP.
Diffstat (limited to 'pl/math')
-rw-r--r-- | pl/math/include/mathlib.h | 5 | ||||
-rw-r--r-- | pl/math/s_sinh_3u.c | 6 | ||||
-rw-r--r-- | pl/math/sinh_3u.c | 55 | ||||
-rw-r--r-- | pl/math/test/mathbench_funcs.h | 2 | ||||
-rwxr-xr-x | pl/math/test/runulp.sh | 23 | ||||
-rw-r--r-- | pl/math/test/testcases/directed/sinh.tst | 21 | ||||
-rw-r--r-- | pl/math/test/ulp_funcs.h | 2 | ||||
-rw-r--r-- | pl/math/test/ulp_wrappers.h | 1 | ||||
-rw-r--r-- | pl/math/v_sinh_3u.c | 45 | ||||
-rw-r--r-- | pl/math/vn_sinh_3u.c | 12 |
10 files changed, 171 insertions, 1 deletions
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h index 9ebe539..96da00a 100644 --- a/pl/math/include/mathlib.h +++ b/pl/math/include/mathlib.h @@ -28,6 +28,7 @@ double erfc (double); double expm1 (double); double log10 (double); double log1p (double); +double sinh (double); float __s_asinhf (float); float __s_atanf (float); @@ -50,6 +51,7 @@ double __s_expm1 (double); double __s_log10 (double); double __s_log1p (double); double __s_log2 (double); +double __s_sinh (double); #if __aarch64__ #if __GNUC__ >= 5 @@ -82,6 +84,7 @@ __f64x2_t __v_log1p (__f64x2_t); __f32x4_t __v_log2f (__f32x4_t); __f64x2_t __v_log2 (__f64x2_t); __f32x4_t __v_sinhf (__f32x4_t); +__f64x2_t __v_sinh (__f64x2_t); __f32x4_t __v_tanf (__f32x4_t); #if __GNUC__ >= 9 || __clang_major__ >= 8 @@ -107,6 +110,7 @@ __vpcs __f64x2_t __vn_log1p (__f64x2_t); __vpcs __f32x4_t __vn_log2f (__f32x4_t); __vpcs __f64x2_t __vn_log2 (__f64x2_t); __vpcs __f32x4_t __vn_sinhf (__f32x4_t); +__vpcs __f64x2_t __vn_sinh (__f64x2_t); __vpcs __f32x4_t __vn_tanf (__f32x4_t); /* Vector functions following the vector PCS using ABI names. */ @@ -129,6 +133,7 @@ __vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t); __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t); __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t); __vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t); +__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t); __vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t); #endif diff --git a/pl/math/s_sinh_3u.c b/pl/math/s_sinh_3u.c new file mode 100644 index 0000000..2c08fa1 --- /dev/null +++ b/pl/math/s_sinh_3u.c @@ -0,0 +1,6 @@ +/* + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +#define SCALAR 1 +#include "v_sinh_3u.c" diff --git a/pl/math/sinh_3u.c b/pl/math/sinh_3u.c new file mode 100644 index 0000000..ce3ff13 --- /dev/null +++ b/pl/math/sinh_3u.c @@ -0,0 +1,55 @@ +/* + * Double-precision sinh(x) function. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "math_config.h" + +#define AbsMask 0x7fffffffffffffff +#define Half 0x3fe0000000000000 +#define OFlowBound \ + 0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results \ + in NaN. */ + +double +__exp_dd (double, double); + +/* Approximation for double-precision sinh(x) using expm1. + sinh(x) = (exp(x) - exp(-x)) / 2. + The greatest observed error is 2.57 ULP: + __v_sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2 + want 0x1.ab34e59d678d9p-2. */ +double +sinh (double x) +{ + uint64_t ix = asuint64 (x); + uint64_t iax = ix & AbsMask; + double ax = asdouble (iax); + uint64_t sign = ix & ~AbsMask; + double halfsign = asdouble (Half | sign); + + if (unlikely (iax >= OFlowBound)) + { + /* Special values and overflow. */ + if (unlikely (iax > 0x7ff0000000000000)) + return __math_invalidf (x); + /* expm1 overflows a little before sinh. We have to fill this + gap by using a different algorithm, in this case we use a + double-precision exp helper. For large x sinh(x) is dominated + by exp(x), however we cannot compute exp without overflow + either. We use the identity: exp(a) = (exp(a / 2)) ^ 2 + to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2 for x > 0 + ~= (exp(|x| / 2)) ^ 2 / -2 for x < 0. */ + double e = __exp_dd (ax / 2, 0); + return (e * halfsign) * e; + } + + /* Use expm1f to retain acceptable precision for small numbers. + Let t = e^(|x|) - 1. */ + double t = expm1 (ax); + /* Then sinh(x) = (t + t / (t + 1)) / 2 for x > 0 + (t + t / (t + 1)) / -2 for x < 0. */ + return (t + t / (t + 1)) * halfsign; +} diff --git a/pl/math/test/mathbench_funcs.h b/pl/math/test/mathbench_funcs.h index 9289f45..49208de 100644 --- a/pl/math/test/mathbench_funcs.h +++ b/pl/math/test/mathbench_funcs.h @@ -55,6 +55,7 @@ D (log1p, -0.9, 10.0) D (log2, 0.01, 11.1) {"powi", 'd', 0, 0.01, 11.1, {.d = powi_wrap}}, D (sin, -3.1, 3.1) +D (sinh, -10.0, 10.0) #if WANT_VMATH ZVNF (asinhf, -10.0, 10.0) @@ -74,6 +75,7 @@ ZVND (log1p, -0.9, 10.0) ZVNF (log2f, 0.01, 11.1) ZVND (log2, 0.01, 11.1) ZVNF (sinhf, -10.0, 10.0) +ZVND (sinh, -10.0, 10.0) ZVNF (tanf, -3.1, 3.1) {"__s_atan2f", 'f', 0, -10.0, 10.0, {.f = __s_atan2f_wrap}}, {"__s_atan2", 'd', 0, -10.0, 10.0, {.d = __s_atan2_wrap}}, diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh index c92892b..8835db1 100755 --- a/pl/math/test/runulp.sh +++ b/pl/math/test/runulp.sh @@ -179,7 +179,6 @@ t coshf -0 -0x1p-63 100 t coshf -0 -0x1.5a92d8p+6 80000 t coshf -0x1.5a92d8p+6 -inf 2000 - L=1.68 t expm1 0 0x1p-51 1000 t expm1 -0 -0x1p-51 1000 @@ -188,6 +187,14 @@ t expm1 -0x1p-51 -0x1.740bf7c0d927dp+9 100000 t expm1 0x1.63108c75a1937p+9 inf 100 t expm1 -0x1.740bf7c0d927dp+9 -inf 100 +L=2.08 +t sinh 0 0x1p-51 100 +t sinh -0 -0x1p-51 100 +t sinh 0x1p-51 0x1.62e42fefa39fp+9 100000 +t sinh -0x1p-51 -0x1.62e42fefa39fp+9 100000 +t sinh 0x1.62e42fefa39fp+9 inf 1000 +t sinh -0x1.62e42fefa39fp+9 -inf 1000 + done # vector functions @@ -393,6 +400,15 @@ range_expm1=' -0x1.740bf7c0d927dp+9 -inf 100 ' +range_sinh=' + 0 0x1p-51 100 + -0 -0x1p-51 100 + 0x1p-51 0x1.62e42fefa39fp+9 100000 + -0x1p-51 -0x1.62e42fefa39fp+9 100000 + 0x1.62e42fefa39fp+9 inf 1000 + -0x1.62e42fefa39fp+9 -inf 1000 +' + range_sve_cosf=' 0 0xffff0000 10000 0x1p-4 0x1p4 500000 @@ -556,6 +572,7 @@ L_expm1f=1.02 L_sinhf=1.76 L_coshf=1.89 L_expm1=1.68 +L_sinh=2.08 L_sve_cosf=1.57 L_sve_cos=1.61 @@ -638,6 +655,10 @@ expm1 __s_expm1 $runs fenv expm1 __v_expm1 $runv fenv expm1 __vn_expm1 $runvn fenv expm1 _ZGVnN2v_expm1 $runvn fenv +sinh __s_sinh $runs fenv +sinh __v_sinh $runv fenv +sinh __vn_sinh $runvn fenv +sinh _ZGVnN2v_sinh $runvn fenv atanf __s_atanf $runs atanf __v_atanf $runv diff --git a/pl/math/test/testcases/directed/sinh.tst b/pl/math/test/testcases/directed/sinh.tst new file mode 100644 index 0000000..d8c7d91 --- /dev/null +++ b/pl/math/test/testcases/directed/sinh.tst @@ -0,0 +1,21 @@ +; sinh.tst +; +; Copyright 1999-2022, Arm Limited. +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +func=sinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 +func=sinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0 +func=sinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i +func=sinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i +func=sinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 +func=sinh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox +func=sinh op1=fff00000.00000000 result=fff00000.00000000 errno=0 +func=sinh op1=ffefffff.ffffffff result=fff00000.00000000 errno=ERANGE status=ox +func=sinh op1=00000000.00000000 result=00000000.00000000 errno=0 +func=sinh op1=80000000.00000000 result=80000000.00000000 errno=0 + +; No exception is raised with certain versions of glibc. Functions +; approximated by x near zero may not generate/implement flops and +; thus may not raise exceptions. +func=sinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux +func=sinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h index a6c3866..b3bd940 100644 --- a/pl/math/test/ulp_funcs.h +++ b/pl/math/test/ulp_funcs.h @@ -51,6 +51,7 @@ D1 (erfc) D1 (expm1) D1 (log10) D1 (log1p) +D1 (sinh) #if WANT_VMATH _ZVNF1 (asinh) _ZVNF1 (atan) @@ -71,6 +72,7 @@ _ZVND1 (log1p) _ZVNF1 (log2) _ZVND1 (log2) _ZVNF1 (sinh) +_ZVND1 (sinh) _ZVNF1 (tan) #if WANT_SVE_MATH _ZSVF2 (atan2) diff --git a/pl/math/test/ulp_wrappers.h b/pl/math/test/ulp_wrappers.h index 18dfe13..91f8e76 100644 --- a/pl/math/test/ulp_wrappers.h +++ b/pl/math/test/ulp_wrappers.h @@ -135,6 +135,7 @@ ZVND1_WRAP(expm1) ZVND1_WRAP(log10) ZVND1_WRAP(log1p) ZVND1_WRAP(log2) +ZVND1_WRAP(sinh) #if WANT_SVE_MATH ZSVNF2_WRAP(atan2) ZSVNF1_WRAP(atan) diff --git a/pl/math/v_sinh_3u.c b/pl/math/v_sinh_3u.c new file mode 100644 index 0000000..c707364 --- /dev/null +++ b/pl/math/v_sinh_3u.c @@ -0,0 +1,45 @@ +/* + * Double-precision vector sinh(x) function. + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "v_math.h" +#include "mathlib.h" + +#define AbsMask 0x7fffffffffffffff +#define Half 0x3fe0000000000000 +#define OFlowBound \ + 0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results \ + in NaN. */ + +#if V_SUPPORTED + +/* Approximation for vector double-precision sinh(x) using expm1. + sinh(x) = (exp(x) - exp(-x)) / 2. + The greatest observed error is 2.57 ULP: + sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2 + want 0x1.ab34e59d678d9p-2. */ +VPCS_ATTR v_f64_t V_NAME (sinh) (v_f64_t x) +{ + v_u64_t ix = v_as_u64_f64 (x); + v_u64_t iax = ix & AbsMask; + v_f64_t ax = v_as_f64_u64 (iax); + v_u64_t sign = ix & ~AbsMask; + v_f64_t halfsign = v_as_f64_u64 (sign | Half); + + v_u64_t special = v_cond_u64 (iax >= OFlowBound); + /* Fall back to the scalar variant for all lanes if any of them should trigger + an exception. */ + if (unlikely (v_any_u64 (special))) + return v_call_f64 (sinh, x, x, v_u64 (-1)); + + /* Up to the point that expm1 overflows, we can use it to calculate sinh + using a slight rearrangement of the definition of asinh. This allows us to + retain acceptable accuracy for very small inputs. */ + v_f64_t t = V_NAME (expm1) (ax); + return (t + t / (t + 1)) * halfsign; +} +VPCS_ALIAS + +#endif diff --git a/pl/math/vn_sinh_3u.c b/pl/math/vn_sinh_3u.c new file mode 100644 index 0000000..2b68578 --- /dev/null +++ b/pl/math/vn_sinh_3u.c @@ -0,0 +1,12 @@ +/* + * AdvSIMD vector PCS variant of __v_sinh. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +#include "include/mathlib.h" +#ifdef __vpcs +#define VPCS 1 +#define VPCS_ALIAS strong_alias (__vn_sinh, _ZGVnN2v_sinh) +#include "v_sinh_3u.c" +#endif |