From 83a43cd7afcbdca7e192704121acfad6fa24cb67 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Fri, 15 Jul 2022 08:56:20 +0100 Subject: pl/math: Add vector/Neon asinhf The new routine uses vector log1pf, and is accurate to 2.7 ulp. --- pl/math/v_asinhf_2u7.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 pl/math/v_asinhf_2u7.c (limited to 'pl/math/v_asinhf_2u7.c') diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c new file mode 100644 index 0000000..39f7989 --- /dev/null +++ b/pl/math/v_asinhf_2u7.c @@ -0,0 +1,51 @@ +/* + * Single-precision vector asinh(x) function. + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "v_math.h" +#include "include/mathlib.h" + +#if V_SUPPORTED + +#define SignMask v_u32 (0x80000000) +#define One v_f32 (1.0f) +#define Ln2 v_f32 (0x1.62e43p-1f) +#define SpecialBound v_u32 (0x5f800000) /* asuint(0x1p64). */ + +static inline v_f32_t +handle_special (v_f32_t ax) +{ + return V_NAME (log1pf) (ax) + Ln2; +} + +/* Single-precision implementation of vector asinh(x), using vector log1p. + Worst-case error is 2.66 ULP, at roughly +/-0.25: + __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */ +VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x) +{ + v_f32_t ax = v_abs_f32 (x); + v_u32_t special = v_cond_u32 (v_as_u32_f32 (ax) >= SpecialBound); + v_u32_t sign = v_as_u32_f32 (x) & SignMask; + + /* asinh(x) = log(x + sqrt(x * x + 1)). + For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */ + v_f32_t d = One + v_sqrt_f32 (ax * ax + One); + v_f32_t y = V_NAME (log1pf) (ax + ax * ax / d); + + if (unlikely (v_any_u32 (special))) + { + /* If |x| is too large, we cannot square it at low cost without overflow. + At very large x, asinh(x) ~= log(2x) and log(x) ~= log1p(x), so we + calculate asinh(x) as log1p(x) + log(2). */ + v_f32_t y_large = V_NAME (log1pf) (ax) + Ln2; + return v_as_f32_u32 (sign + | v_as_u32_f32 (v_sel_f32 (special, y_large, y))); + } + + return v_as_f32_u32 (sign | v_as_u32_f32 (y)); +} +VPCS_ALIAS + +#endif -- cgit v1.2.3 From 3d1a87e2fe152dc52d4a624425f5b2349a4088b0 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Mon, 15 Aug 2022 11:19:25 +0100 Subject: pl/math: Audit Neon special-case handlers Prevent inlining in most cases - change to use AOR style (NOINLINE). --- pl/math/v_asinhf_2u7.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'pl/math/v_asinhf_2u7.c') diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c index 39f7989..675b8a8 100644 --- a/pl/math/v_asinhf_2u7.c +++ b/pl/math/v_asinhf_2u7.c @@ -14,12 +14,6 @@ #define Ln2 v_f32 (0x1.62e43p-1f) #define SpecialBound v_u32 (0x5f800000) /* asuint(0x1p64). */ -static inline v_f32_t -handle_special (v_f32_t ax) -{ - return V_NAME (log1pf) (ax) + Ln2; -} - /* Single-precision implementation of vector asinh(x), using vector log1p. Worst-case error is 2.66 ULP, at roughly +/-0.25: __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */ -- cgit v1.2.3 From 2a963bbff4f16998def16ab5c7b1c7ab92f825a8 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Tue, 6 Dec 2022 10:40:54 +0000 Subject: pl/math: Set fenv flags in Neon asinhf Routine no longer relies on vector log1pf, as this has to become more complex to deal with fenv itself. Instead we re-use a log1pf helper from Neon atanhf which does no special-case handling, instead leaving it all up to the main routine. We now just fall back to the scalar routine for special-case handling. This uncovered a mistake in asinhf's handling of NaNs, which has been fixed. --- pl/math/v_asinhf_2u7.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) (limited to 'pl/math/v_asinhf_2u7.c') diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c index 675b8a8..7bce7ff 100644 --- a/pl/math/v_asinhf_2u7.c +++ b/pl/math/v_asinhf_2u7.c @@ -11,34 +11,45 @@ #define SignMask v_u32 (0x80000000) #define One v_f32 (1.0f) -#define Ln2 v_f32 (0x1.62e43p-1f) -#define SpecialBound v_u32 (0x5f800000) /* asuint(0x1p64). */ +#define BigBound v_u32 (0x5f800000) /* asuint(0x1p64). */ +#define TinyBound v_u32 (0x30800000) /* asuint(0x1p-30). */ + +#include "v_log1pf_inline.h" + +static NOINLINE v_f32_t +specialcase (v_f32_t x, v_f32_t y, v_u32_t special) +{ + return v_call_f32 (asinhf, x, y, special); +} /* Single-precision implementation of vector asinh(x), using vector log1p. Worst-case error is 2.66 ULP, at roughly +/-0.25: __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */ VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x) { - v_f32_t ax = v_abs_f32 (x); - v_u32_t special = v_cond_u32 (v_as_u32_f32 (ax) >= SpecialBound); - v_u32_t sign = v_as_u32_f32 (x) & SignMask; + v_u32_t ix = v_as_u32_f32 (x); + v_u32_t iax = ix & ~SignMask; + v_u32_t sign = ix & SignMask; + v_f32_t ax = v_as_f32_u32 (iax); + v_u32_t special = v_cond_u32 (iax >= BigBound); + +#if WANT_ERRNO + /* Sidestep tiny and large values to avoid inadvertently triggering + under/overflow. */ + special |= v_cond_u32 (iax < TinyBound); + if (unlikely (v_any_u32 (special))) + ax = v_sel_f32 (special, One, ax); +#endif /* asinh(x) = log(x + sqrt(x * x + 1)). For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */ v_f32_t d = One + v_sqrt_f32 (ax * ax + One); - v_f32_t y = V_NAME (log1pf) (ax + ax * ax / d); + v_f32_t y = log1pf_inline (ax + ax * ax / d); + y = v_as_f32_u32 (sign | v_as_u32_f32 (y)); if (unlikely (v_any_u32 (special))) - { - /* If |x| is too large, we cannot square it at low cost without overflow. - At very large x, asinh(x) ~= log(2x) and log(x) ~= log1p(x), so we - calculate asinh(x) as log1p(x) + log(2). */ - v_f32_t y_large = V_NAME (log1pf) (ax) + Ln2; - return v_as_f32_u32 (sign - | v_as_u32_f32 (v_sel_f32 (special, y_large, y))); - } - - return v_as_f32_u32 (sign | v_as_u32_f32 (y)); + return specialcase (x, y, special); + return y; } VPCS_ALIAS -- cgit v1.2.3 From 1bca1a541cce13c352296acd5dfa16160fc27bc9 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:31 +0000 Subject: pl/math: Auto-generate mathbench and ulp headers Instead of maintaining three separate lists of routines, which are cumbersome and prone to merge conflicts, we provide a new macro, PL_SIG, which by some preprocessor machinery outputs the lists in the required format (macro formats have been changed very slightly to make the generation simpler). Only routines with simple signatures are handled - binary functions still need mathbench wrappers defined manually. As well, routines with non-standard references (i.e. powi/powk) still need entries and wrappers manually defined. --- pl/math/v_asinhf_2u7.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'pl/math/v_asinhf_2u7.c') diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c index 7bce7ff..18a2395 100644 --- a/pl/math/v_asinhf_2u7.c +++ b/pl/math/v_asinhf_2u7.c @@ -6,6 +6,7 @@ #include "v_math.h" #include "include/mathlib.h" +#include "pl_sig.h" #if V_SUPPORTED @@ -53,4 +54,5 @@ VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x) } VPCS_ALIAS +PL_SIG (V, F, 1, asinh, -10.0, 10.0) #endif -- cgit v1.2.3 From ecb1c6f6ea7872645cb4c26514d5f64815b61a1b Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:39 +0000 Subject: pl/math: Move ULP limits to routine source files Introduces a new set of macros and Make rules for mechanically generating a list of ULP limits for each routine, to be consumed by runulp.sh. This removes the need to maintain long lists of thresholds in runulp.sh. --- pl/math/v_asinhf_2u7.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'pl/math/v_asinhf_2u7.c') diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c index 18a2395..812e28f 100644 --- a/pl/math/v_asinhf_2u7.c +++ b/pl/math/v_asinhf_2u7.c @@ -7,6 +7,7 @@ #include "v_math.h" #include "include/mathlib.h" #include "pl_sig.h" +#include "pl_test.h" #if V_SUPPORTED @@ -55,4 +56,5 @@ VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x) VPCS_ALIAS PL_SIG (V, F, 1, asinh, -10.0, 10.0) +PL_TEST_ULP (V_NAME (asinhf), 2.17) #endif -- cgit v1.2.3 From d748e1520dd2ff5ad3574bd0827cdd882bf6bed8 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:57 +0000 Subject: pl/math: Move fenv expectations out of runulp.sh Introduces a new macro, similar to how ULP thresholds are now handled, that emits a list of routines which are expected to correctly trigger fenv exceptions, to be consumed by runulp.sh. All scalar routines are expected to do so. A small number of Neon routines are also expected to, dependent on WANT_ERRNO. --- pl/math/v_asinhf_2u7.c | 1 + 1 file changed, 1 insertion(+) (limited to 'pl/math/v_asinhf_2u7.c') diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c index 812e28f..32fe773 100644 --- a/pl/math/v_asinhf_2u7.c +++ b/pl/math/v_asinhf_2u7.c @@ -57,4 +57,5 @@ VPCS_ALIAS PL_SIG (V, F, 1, asinh, -10.0, 10.0) PL_TEST_ULP (V_NAME (asinhf), 2.17) +PL_TEST_EXPECT_FENV (V_NAME (asinhf), WANT_ERRNO) #endif -- cgit v1.2.3 From 202e46317ee8983516b6413066a57bd624ffa044 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:28:06 +0000 Subject: pl/math: Move test intervals to routine source files To conclude the work on simplifying the runulp.sh script, a new macro has been introduced to specify the intervals in which a routine should be tested in the routine source. This is eventually consumed by runulp.sh. --- pl/math/v_asinhf_2u7.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'pl/math/v_asinhf_2u7.c') diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c index 32fe773..79bf80f 100644 --- a/pl/math/v_asinhf_2u7.c +++ b/pl/math/v_asinhf_2u7.c @@ -58,4 +58,12 @@ VPCS_ALIAS PL_SIG (V, F, 1, asinh, -10.0, 10.0) PL_TEST_ULP (V_NAME (asinhf), 2.17) PL_TEST_EXPECT_FENV (V_NAME (asinhf), WANT_ERRNO) +PL_TEST_INTERVAL (V_NAME (asinhf), 0, 0x1p-12, 40000) +PL_TEST_INTERVAL (V_NAME (asinhf), 0x1p-12, 1.0, 40000) +PL_TEST_INTERVAL (V_NAME (asinhf), 1.0, 0x1p11, 40000) +PL_TEST_INTERVAL (V_NAME (asinhf), 0x1p11, inf, 40000) +PL_TEST_INTERVAL (V_NAME (asinhf), 0, -0x1p-12, 20000) +PL_TEST_INTERVAL (V_NAME (asinhf), -0x1p-12, -1.0, 20000) +PL_TEST_INTERVAL (V_NAME (asinhf), -1.0, -0x1p11, 20000) +PL_TEST_INTERVAL (V_NAME (asinhf), -0x1p11, -inf, 20000) #endif -- cgit v1.2.3 From d05594e6718e6d86959c823bea4f019dea878bcb Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Mon, 19 Dec 2022 12:34:51 +0000 Subject: pl/math: Replace WANT_ERRNO with WANT_SIMD_EXCEPT for Neon fenv We were previously misusing the WANT_ERRNO build flag. This is now replaced everywhere appropriate with WANT_SIMD_EXCEPT. A small number of vector routines get fp exceptions right with no modification - the tests have been updated to track this. --- pl/math/v_asinhf_2u7.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'pl/math/v_asinhf_2u7.c') diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c index 79bf80f..4710a22 100644 --- a/pl/math/v_asinhf_2u7.c +++ b/pl/math/v_asinhf_2u7.c @@ -35,7 +35,7 @@ VPCS_ATTR v_f32_t V_NAME (asinhf) (v_f32_t x) v_f32_t ax = v_as_f32_u32 (iax); v_u32_t special = v_cond_u32 (iax >= BigBound); -#if WANT_ERRNO +#if WANT_SIMD_EXCEPT /* Sidestep tiny and large values to avoid inadvertently triggering under/overflow. */ special |= v_cond_u32 (iax < TinyBound); @@ -57,7 +57,7 @@ VPCS_ALIAS PL_SIG (V, F, 1, asinh, -10.0, 10.0) PL_TEST_ULP (V_NAME (asinhf), 2.17) -PL_TEST_EXPECT_FENV (V_NAME (asinhf), WANT_ERRNO) +PL_TEST_EXPECT_FENV (V_NAME (asinhf), WANT_SIMD_EXCEPT) PL_TEST_INTERVAL (V_NAME (asinhf), 0, 0x1p-12, 40000) PL_TEST_INTERVAL (V_NAME (asinhf), 0x1p-12, 1.0, 40000) PL_TEST_INTERVAL (V_NAME (asinhf), 1.0, 0x1p11, 40000) -- cgit v1.2.3 From f0f80b8a19b2593491847ed87456694d789f6f80 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Fri, 6 Jan 2023 09:10:57 +0000 Subject: pl/math: Update copyright years All files in pl/math updated to 2023. --- pl/math/v_asinhf_2u7.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'pl/math/v_asinhf_2u7.c') diff --git a/pl/math/v_asinhf_2u7.c b/pl/math/v_asinhf_2u7.c index 4710a22..9d8c8a9 100644 --- a/pl/math/v_asinhf_2u7.c +++ b/pl/math/v_asinhf_2u7.c @@ -1,6 +1,7 @@ /* * Single-precision vector asinh(x) function. - * Copyright (c) 2022, Arm Limited. + * + * Copyright (c) 2022-2023, Arm Limited. * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -- cgit v1.2.3