From 0866a19c42fd560ea8985f829b17c4b93b6a6f1b Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 5 Jan 2023 10:30:01 +0000 Subject: pl/math: Add vector/Neon acoshf New routine uses inlined log1pf helper, and is accurate to 3.1 ULP (2.8 ULP if fp exceptions are enabled). --- pl/math/include/mathlib.h | 4 +++ pl/math/s_acoshf_3u1.c | 6 +++++ pl/math/v_acoshf_3u1.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ pl/math/vn_acoshf_3u1.c | 12 +++++++++ 4 files changed, 90 insertions(+) create mode 100644 pl/math/s_acoshf_3u1.c create mode 100644 pl/math/v_acoshf_3u1.c create mode 100644 pl/math/vn_acoshf_3u1.c diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h index 43c5ebc..dc2cef8 100644 --- a/pl/math/include/mathlib.h +++ b/pl/math/include/mathlib.h @@ -39,6 +39,7 @@ double log1p (double); double sinh (double); double tanh (double); +float __s_acoshf (float); float __s_asinhf (float); float __s_atanf (float); float __s_atan2f (float, float); @@ -82,6 +83,7 @@ typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t; #endif /* Vector functions following the base PCS. */ +__f32x4_t __v_acoshf (__f32x4_t); __f32x4_t __v_asinhf (__f32x4_t); __f64x2_t __v_asinh (__f64x2_t); __f32x4_t __v_atanf (__f32x4_t); @@ -116,6 +118,7 @@ __f64x2_t __v_tanh (__f64x2_t); #define __vpcs __attribute__((__aarch64_vector_pcs__)) /* Vector functions following the vector PCS. */ +__vpcs __f32x4_t __vn_acoshf (__f32x4_t); __vpcs __f32x4_t __vn_asinhf (__f32x4_t); __vpcs __f64x2_t __vn_asinh (__f64x2_t); __vpcs __f32x4_t __vn_atanf (__f32x4_t); @@ -147,6 +150,7 @@ __vpcs __f32x4_t __vn_tanhf (__f32x4_t); __vpcs __f64x2_t __vn_tanh (__f64x2_t); /* Vector functions following the vector PCS using ABI names. */ +__vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t); __vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t); __vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t); diff --git a/pl/math/s_acoshf_3u1.c b/pl/math/s_acoshf_3u1.c new file mode 100644 index 0000000..3740666 --- /dev/null +++ b/pl/math/s_acoshf_3u1.c @@ -0,0 +1,6 @@ +/* + * Copyright (c) 2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +#define SCALAR 1 +#include "v_acoshf_3u1.c" diff --git a/pl/math/v_acoshf_3u1.c b/pl/math/v_acoshf_3u1.c new file mode 100644 index 0000000..2b5aff5 --- /dev/null +++ b/pl/math/v_acoshf_3u1.c @@ -0,0 +1,68 @@ +/* + * Single-precision vector acosh(x) function. + * Copyright (c) 2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "v_math.h" +#include "pl_sig.h" +#include "pl_test.h" + +#define SignMask 0x80000000 +#define One 0x3f800000 +#define SquareLim 0x5f800000 /* asuint(0x1p64). */ + +#if V_SUPPORTED + +#include "v_log1pf_inline.h" + +static NOINLINE VPCS_ATTR v_f32_t +special_case (v_f32_t x, v_f32_t y, v_u32_t special) +{ + return v_call_f32 (acoshf, x, y, special); +} + +/* Vector approximation for single-precision acosh, based on log1p. Maximum + error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it + is 2.78 ULP: + __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 + want 0x1.ef9ea2p-3. + With exceptions disabled, we can compute u with a shorter dependency chain, + which gives maximum error of 3.07 ULP: + __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4 + want 0x1.fbc7f4p-4. */ + +VPCS_ATTR v_f32_t V_NAME (acoshf) (v_f32_t x) +{ + v_u32_t ix = v_as_u32_f32 (x); + v_u32_t special = v_cond_u32 ((ix - One) >= (SquareLim - One)); + +#if WANT_SIMD_EXCEPT + /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use + only xm1 to calculate u, as operating on x will trigger invalid for NaN. */ + v_f32_t xm1 = v_sel_f32 (special, v_f32 (1), x - 1); + v_f32_t u = v_fma_f32 (xm1, xm1, 2 * xm1); +#else + v_f32_t xm1 = x - 1; + v_f32_t u = xm1 * (x + 1.0f); +#endif + v_f32_t y = log1pf_inline (xm1 + v_sqrt_f32 (u)); + + if (unlikely (v_any_u32 (special))) + return special_case (x, y, special); + return y; +} +VPCS_ALIAS + +PL_SIG (V, F, 1, acosh, 1.0, 10.0) +#if WANT_SIMD_EXCEPT +PL_TEST_ULP (V_NAME (acoshf), 2.29) +#else +PL_TEST_ULP (V_NAME (acoshf), 2.58) +#endif +PL_TEST_EXPECT_FENV (V_NAME (acoshf), WANT_SIMD_EXCEPT) +PL_TEST_INTERVAL (V_NAME (acoshf), 0, 1, 500) +PL_TEST_INTERVAL (V_NAME (acoshf), 1, SquareLim, 100000) +PL_TEST_INTERVAL (V_NAME (acoshf), SquareLim, inf, 1000) +PL_TEST_INTERVAL (V_NAME (acoshf), -0, -inf, 1000) +#endif diff --git a/pl/math/vn_acoshf_3u1.c b/pl/math/vn_acoshf_3u1.c new file mode 100644 index 0000000..8c5f106 --- /dev/null +++ b/pl/math/vn_acoshf_3u1.c @@ -0,0 +1,12 @@ +/* + * AdvSIMD vector PCS variant of __v_acoshf. + * + * Copyright (c) 2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +#include "include/mathlib.h" +#ifdef __vpcs +#define VPCS 1 +#define VPCS_ALIAS PL_ALIAS (__vn_acoshf, _ZGVnN4v_acoshf) +#include "v_acoshf_3u1.c" +#endif -- cgit v1.2.3