From 8a644bf15812edaba38b41ca142e8e7e328e7918 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Wed, 30 Nov 2022 09:42:41 +0000 Subject: pl/math: Add scalar and vector/Neon tanhf Both routines use simplified inline versions of expm1f, and are accurate to 2.6 ULP. --- pl/math/v_tanhf_2u6.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 pl/math/v_tanhf_2u6.c (limited to 'pl/math/v_tanhf_2u6.c') diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c new file mode 100644 index 0000000..571fd8b --- /dev/null +++ b/pl/math/v_tanhf_2u6.c @@ -0,0 +1,93 @@ +/* + * Single-precision vector tanh(x) function. + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "v_math.h" +#include "mathlib.h" + +#if V_SUPPORTED + +#define BoringBound \ + 0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for \ + negative). */ +#define AbsMask 0x7fffffff +#define One 0x3f800000 + +#define Shift v_f32 (0x1.8p23f) +#define InvLn2 v_f32 (0x1.715476p+0f) +#define MLn2hi v_f32 (-0x1.62e4p-1f) +#define MLn2lo v_f32 (-0x1.7f7d1cp-20f) + +#define C(i) v_f32 (__expm1f_poly[i]) + +static inline v_f32_t +expm1f_inline (v_f32_t x) +{ + /* Helper routine for calculating exp(x) - 1. + Copied from v_expm1f_1u6.c, with all special-case handling removed, as + special, tiny and large values are all dealt with in the main tanhf + routine. */ + + /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ + v_f32_t j = v_fma_f32 (InvLn2, x, Shift) - Shift; + v_s32_t i = v_to_s32_f32 (j); + v_f32_t f = v_fma_f32 (j, MLn2hi, x); + f = v_fma_f32 (j, MLn2lo, f); + + /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). + Uses Estrin scheme, where the main __v_expm1f routine uses Horner. */ + v_f32_t f2 = f * f; + v_f32_t p_01 = v_fma_f32 (f, C (1), C (0)); + v_f32_t p_23 = v_fma_f32 (f, C (3), C (2)); + v_f32_t p = v_fma_f32 (f2, p_23, p_01); + p = v_fma_f32 (f2 * f2, C (4), p); + p = v_fma_f32 (f2, p, f); + + /* t = 2^i. */ + v_f32_t t = v_as_f32_u32 (v_as_u32_s32 (i << 23) + One); + /* expm1(x) ~= p * t + (t - 1). */ + return v_fma_f32 (p, t, t - 1); +} + +static NOINLINE v_f32_t +special_case (v_f32_t x, v_f32_t y, v_u32_t special) +{ + return v_call_f32 (tanhf, x, y, special); +} + +/* Approximation for single-precision vector tanh(x), using a simplified version + of expm1f. The maximum error is 2.58 ULP: + __v_tanhf(0x1.fa5eep-5) got 0x1.f9ba02p-5 + want 0x1.f9ba08p-5. */ +VPCS_ATTR v_f32_t V_NAME (tanhf) (v_f32_t x) +{ + v_u32_t ix = v_as_u32_f32 (x); + v_u32_t iax = ix & AbsMask; + v_u32_t sign = ix & ~AbsMask; + v_u32_t is_boring = v_cond_u32 (iax > BoringBound); + v_f32_t boring = v_as_f32_u32 (sign | One); + +#if WANT_ERRNO + /* If errno needs to be set properly, set all special and boring lanes to 1, + which will trigger no exceptions, and fix them up later. */ + v_u32_t special = v_cond_u32 ((iax > 0x7f800000) | (iax < 0x34000000)); + ix = v_sel_u32 (is_boring, v_u32 (One), ix); + if (unlikely (v_any_u32 (special))) + ix = v_sel_u32 (special, v_u32 (One), ix); +#else + v_u32_t special = v_cond_u32 ((iax > 0x7f800000) | (iax == 0)); +#endif + + /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ + v_f32_t q = expm1f_inline (2 * v_as_f32_u32 (ix)); + v_f32_t y = q / (q + 2); + y = v_sel_f32 (is_boring, boring, y); + if (unlikely (v_any_u32 (special))) + return special_case (x, y, special); + return y; +} +VPCS_ALIAS + +#endif -- cgit v1.2.3 From bc7cc9d2a762a26b2fcbf150b3fc9c6993ffa16c Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Fri, 9 Dec 2022 12:19:38 +0000 Subject: pl/math: Add polynomial helpers Add macros for simplifying polynomial evaluation using either Horner, pairwise Horner or Estrin. Several routines have been modified to use the new helpers. Readability is improved slightly, and we expect that this will make prototyping new routines simpler. --- pl/math/v_tanhf_2u6.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'pl/math/v_tanhf_2u6.c') diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c index 571fd8b..67e4520 100644 --- a/pl/math/v_tanhf_2u6.c +++ b/pl/math/v_tanhf_2u6.c @@ -6,6 +6,7 @@ #include "v_math.h" #include "mathlib.h" +#include "estrinf.h" #if V_SUPPORTED @@ -39,10 +40,7 @@ expm1f_inline (v_f32_t x) /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). Uses Estrin scheme, where the main __v_expm1f routine uses Horner. */ v_f32_t f2 = f * f; - v_f32_t p_01 = v_fma_f32 (f, C (1), C (0)); - v_f32_t p_23 = v_fma_f32 (f, C (3), C (2)); - v_f32_t p = v_fma_f32 (f2, p_23, p_01); - p = v_fma_f32 (f2 * f2, C (4), p); + v_f32_t p = ESTRIN_4 (f, f2, f2 * f2, C); p = v_fma_f32 (f2, p, f); /* t = 2^i. */ -- cgit v1.2.3 From 1bca1a541cce13c352296acd5dfa16160fc27bc9 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:31 +0000 Subject: pl/math: Auto-generate mathbench and ulp headers Instead of maintaining three separate lists of routines, which are cumbersome and prone to merge conflicts, we provide a new macro, PL_SIG, which by some preprocessor machinery outputs the lists in the required format (macro formats have been changed very slightly to make the generation simpler). Only routines with simple signatures are handled - binary functions still need mathbench wrappers defined manually. As well, routines with non-standard references (i.e. powi/powk) still need entries and wrappers manually defined. --- pl/math/v_tanhf_2u6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'pl/math/v_tanhf_2u6.c') diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c index 67e4520..1196c4a 100644 --- a/pl/math/v_tanhf_2u6.c +++ b/pl/math/v_tanhf_2u6.c @@ -5,8 +5,9 @@ */ #include "v_math.h" -#include "mathlib.h" #include "estrinf.h" +#include "mathlib.h" +#include "pl_sig.h" #if V_SUPPORTED @@ -88,4 +89,5 @@ VPCS_ATTR v_f32_t V_NAME (tanhf) (v_f32_t x) } VPCS_ALIAS +PL_SIG (V, F, 1, tanh, -10.0, 10.0) #endif -- cgit v1.2.3 From ecb1c6f6ea7872645cb4c26514d5f64815b61a1b Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:39 +0000 Subject: pl/math: Move ULP limits to routine source files Introduces a new set of macros and Make rules for mechanically generating a list of ULP limits for each routine, to be consumed by runulp.sh. This removes the need to maintain long lists of thresholds in runulp.sh. --- pl/math/v_tanhf_2u6.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'pl/math/v_tanhf_2u6.c') diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c index 1196c4a..bb86794 100644 --- a/pl/math/v_tanhf_2u6.c +++ b/pl/math/v_tanhf_2u6.c @@ -8,6 +8,7 @@ #include "estrinf.h" #include "mathlib.h" #include "pl_sig.h" +#include "pl_test.h" #if V_SUPPORTED @@ -90,4 +91,5 @@ VPCS_ATTR v_f32_t V_NAME (tanhf) (v_f32_t x) VPCS_ALIAS PL_SIG (V, F, 1, tanh, -10.0, 10.0) +PL_TEST_ULP (V_NAME (tanhf), 2.09) #endif -- cgit v1.2.3 From d748e1520dd2ff5ad3574bd0827cdd882bf6bed8 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:57 +0000 Subject: pl/math: Move fenv expectations out of runulp.sh Introduces a new macro, similar to how ULP thresholds are now handled, that emits a list of routines which are expected to correctly trigger fenv exceptions, to be consumed by runulp.sh. All scalar routines are expected to do so. A small number of Neon routines are also expected to, dependent on WANT_ERRNO. --- pl/math/v_tanhf_2u6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'pl/math/v_tanhf_2u6.c') diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c index bb86794..ae87f50 100644 --- a/pl/math/v_tanhf_2u6.c +++ b/pl/math/v_tanhf_2u6.c @@ -92,4 +92,5 @@ VPCS_ALIAS PL_SIG (V, F, 1, tanh, -10.0, 10.0) PL_TEST_ULP (V_NAME (tanhf), 2.09) +PL_TEST_EXPECT_FENV (V_NAME (tanhf), WANT_ERRNO) #endif -- cgit v1.2.3 From 202e46317ee8983516b6413066a57bd624ffa044 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:28:06 +0000 Subject: pl/math: Move test intervals to routine source files To conclude the work on simplifying the runulp.sh script, a new macro has been introduced to specify the intervals in which a routine should be tested in the routine source. This is eventually consumed by runulp.sh. --- pl/math/v_tanhf_2u6.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'pl/math/v_tanhf_2u6.c') diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c index ae87f50..c10be40 100644 --- a/pl/math/v_tanhf_2u6.c +++ b/pl/math/v_tanhf_2u6.c @@ -93,4 +93,10 @@ VPCS_ALIAS PL_SIG (V, F, 1, tanh, -10.0, 10.0) PL_TEST_ULP (V_NAME (tanhf), 2.09) PL_TEST_EXPECT_FENV (V_NAME (tanhf), WANT_ERRNO) +PL_TEST_INTERVAL (V_NAME (tanhf), 0, 0x1p-23, 1000) +PL_TEST_INTERVAL (V_NAME (tanhf), -0, -0x1p-23, 1000) +PL_TEST_INTERVAL (V_NAME (tanhf), 0x1p-23, 0x1.205966p+3, 100000) +PL_TEST_INTERVAL (V_NAME (tanhf), -0x1p-23, -0x1.205966p+3, 100000) +PL_TEST_INTERVAL (V_NAME (tanhf), 0x1.205966p+3, inf, 100) +PL_TEST_INTERVAL (V_NAME (tanhf), -0x1.205966p+3, -inf, 100) #endif -- cgit v1.2.3 From d05594e6718e6d86959c823bea4f019dea878bcb Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Mon, 19 Dec 2022 12:34:51 +0000 Subject: pl/math: Replace WANT_ERRNO with WANT_SIMD_EXCEPT for Neon fenv We were previously misusing the WANT_ERRNO build flag. This is now replaced everywhere appropriate with WANT_SIMD_EXCEPT. A small number of vector routines get fp exceptions right with no modification - the tests have been updated to track this. --- pl/math/v_tanhf_2u6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'pl/math/v_tanhf_2u6.c') diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c index c10be40..dedc085 100644 --- a/pl/math/v_tanhf_2u6.c +++ b/pl/math/v_tanhf_2u6.c @@ -69,9 +69,9 @@ VPCS_ATTR v_f32_t V_NAME (tanhf) (v_f32_t x) v_u32_t is_boring = v_cond_u32 (iax > BoringBound); v_f32_t boring = v_as_f32_u32 (sign | One); -#if WANT_ERRNO - /* If errno needs to be set properly, set all special and boring lanes to 1, - which will trigger no exceptions, and fix them up later. */ +#if WANT_SIMD_EXCEPT + /* If fp exceptions are to be triggered properly, set all special and boring + lanes to 1, which will trigger no exceptions, and fix them up later. */ v_u32_t special = v_cond_u32 ((iax > 0x7f800000) | (iax < 0x34000000)); ix = v_sel_u32 (is_boring, v_u32 (One), ix); if (unlikely (v_any_u32 (special))) @@ -92,7 +92,7 @@ VPCS_ALIAS PL_SIG (V, F, 1, tanh, -10.0, 10.0) PL_TEST_ULP (V_NAME (tanhf), 2.09) -PL_TEST_EXPECT_FENV (V_NAME (tanhf), WANT_ERRNO) +PL_TEST_EXPECT_FENV (V_NAME (tanhf), WANT_SIMD_EXCEPT) PL_TEST_INTERVAL (V_NAME (tanhf), 0, 0x1p-23, 1000) PL_TEST_INTERVAL (V_NAME (tanhf), -0, -0x1p-23, 1000) PL_TEST_INTERVAL (V_NAME (tanhf), 0x1p-23, 0x1.205966p+3, 100000) -- cgit v1.2.3 From 0a9270a27f48bea87c5bd3f0f9c759da66fb45a3 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 22 Dec 2022 16:20:22 +0000 Subject: pl/math: Fix fp exceptions in Neon sinhf and sinh Both routines previously relied on the vector expm1(f) routine exposed by the library, which depended on WANT_SIMD_EXCEPT for its fenv behaviour, however both routines were expected to always trigger fp exceptions correctly. To remedy this, both routines now use an inlined helper for expm1 (reused from vector tanhf in the case of sinhf), and special-case small input as well as large when WANT_SIMD_EXCEPT is enabled. --- pl/math/v_tanhf_2u6.c | 38 ++------------------------------------ 1 file changed, 2 insertions(+), 36 deletions(-) (limited to 'pl/math/v_tanhf_2u6.c') diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c index dedc085..0e7ff69 100644 --- a/pl/math/v_tanhf_2u6.c +++ b/pl/math/v_tanhf_2u6.c @@ -5,51 +5,17 @@ */ #include "v_math.h" -#include "estrinf.h" -#include "mathlib.h" #include "pl_sig.h" #include "pl_test.h" #if V_SUPPORTED +#include "v_expm1f_inline.h" + #define BoringBound \ 0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for \ negative). */ #define AbsMask 0x7fffffff -#define One 0x3f800000 - -#define Shift v_f32 (0x1.8p23f) -#define InvLn2 v_f32 (0x1.715476p+0f) -#define MLn2hi v_f32 (-0x1.62e4p-1f) -#define MLn2lo v_f32 (-0x1.7f7d1cp-20f) - -#define C(i) v_f32 (__expm1f_poly[i]) - -static inline v_f32_t -expm1f_inline (v_f32_t x) -{ - /* Helper routine for calculating exp(x) - 1. - Copied from v_expm1f_1u6.c, with all special-case handling removed, as - special, tiny and large values are all dealt with in the main tanhf - routine. */ - - /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ - v_f32_t j = v_fma_f32 (InvLn2, x, Shift) - Shift; - v_s32_t i = v_to_s32_f32 (j); - v_f32_t f = v_fma_f32 (j, MLn2hi, x); - f = v_fma_f32 (j, MLn2lo, f); - - /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). - Uses Estrin scheme, where the main __v_expm1f routine uses Horner. */ - v_f32_t f2 = f * f; - v_f32_t p = ESTRIN_4 (f, f2, f2 * f2, C); - p = v_fma_f32 (f2, p, f); - - /* t = 2^i. */ - v_f32_t t = v_as_f32_u32 (v_as_u32_s32 (i << 23) + One); - /* expm1(x) ~= p * t + (t - 1). */ - return v_fma_f32 (p, t, t - 1); -} static NOINLINE v_f32_t special_case (v_f32_t x, v_f32_t y, v_u32_t special) -- cgit v1.2.3 From f0f80b8a19b2593491847ed87456694d789f6f80 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Fri, 6 Jan 2023 09:10:57 +0000 Subject: pl/math: Update copyright years All files in pl/math updated to 2023. --- pl/math/v_tanhf_2u6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'pl/math/v_tanhf_2u6.c') diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c index 0e7ff69..3616611 100644 --- a/pl/math/v_tanhf_2u6.c +++ b/pl/math/v_tanhf_2u6.c @@ -1,6 +1,7 @@ /* * Single-precision vector tanh(x) function. - * Copyright (c) 2022, Arm Limited. + * + * Copyright (c) 2022-2023, Arm Limited. * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -- cgit v1.2.3