From 8a644bf15812edaba38b41ca142e8e7e328e7918 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Wed, 30 Nov 2022 09:42:41 +0000 Subject: pl/math: Add scalar and vector/Neon tanhf Both routines use simplified inline versions of expm1f, and are accurate to 2.6 ULP. --- pl/math/tanhf_2u6.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 pl/math/tanhf_2u6.c (limited to 'pl/math/tanhf_2u6.c') diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c new file mode 100644 index 0000000..9db2533 --- /dev/null +++ b/pl/math/tanhf_2u6.c @@ -0,0 +1,80 @@ +/* + * Single-precision tanh(x) function. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ +#include "math_config.h" + +#define BoringBound \ + 0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for \ + negative). */ +#define AbsMask 0x7fffffff +#define One 0x3f800000 + +#define Shift (0x1.8p23f) +#define InvLn2 (0x1.715476p+0f) +#define Ln2hi (0x1.62e4p-1f) +#define Ln2lo (0x1.7f7d1cp-20f) + +#define C(i) __expm1f_poly[i] + +static inline float +expm1f_inline (float x) +{ + /* Helper routine for calculating exp(x) - 1. + Copied from expm1f_1u6.c, with several simplifications: + - No special-case handling for tiny or special values, instead return early + from the main routine. + - No special handling for large values: + - No early return for infinity. + - Simpler combination of p and t in final stage of algorithm. + - |i| < 27, so can calculate t by simpler shift-and-add, instead of + ldexpf (same as vector algorithm). */ + + /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ + float j = fmaf (InvLn2, x, Shift) - Shift; + int32_t i = j; + float f = fmaf (j, -Ln2hi, x); + f = fmaf (j, -Ln2lo, f); + + /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). + Uses Estrin scheme, where the main expm1f routine uses Horner. */ + float f2 = f * f; + float p_01 = fmaf (f, C (1), C (0)); + float p_23 = fmaf (f, C (3), C (2)); + float p = fmaf (f2, p_23, p_01); + p = fmaf (f2 * f2, C (4), p); + p = fmaf (f2, p, f); + + /* t = 2^i. */ + float t = asfloat ((i << 23) + One); + /* expm1(x) ~= p * t + (t - 1). */ + return fmaf (p, t, t - 1); +} + +/* Approximation for single-precision tanh(x), using a simplified version of + expm1f. The maximum error is 2.58 ULP: + tanhf(0x1.fa5eep-5) got 0x1.f9ba02p-5 + want 0x1.f9ba08p-5. */ +float +tanhf (float x) +{ + uint32_t ix = asuint (x); + uint32_t iax = ix & AbsMask; + uint32_t sign = ix & ~AbsMask; + + if (unlikely (iax > BoringBound)) + { + if (iax > 0x7f800000) + return __math_invalidf (x); + return asfloat (One | sign); + } + + if (unlikely (iax < 0x34000000)) + return x; + + /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ + float q = expm1f_inline (2 * x); + return q / (q + 2); +} -- cgit v1.2.3 From 0d3c3cd35440d224ddbcd1496b48835443f4c7c1 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Mon, 5 Dec 2022 11:56:33 +0000 Subject: pl/math: Avoid UB in scalar tanhf The ldexp shortcut was left-shifting a signed value. We now bias the exponent first, will allows the shift to be done on an unsigned value. --- pl/math/tanhf_2u6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pl/math/tanhf_2u6.c') diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c index 9db2533..145f437 100644 --- a/pl/math/tanhf_2u6.c +++ b/pl/math/tanhf_2u6.c @@ -48,7 +48,7 @@ expm1f_inline (float x) p = fmaf (f2, p, f); /* t = 2^i. */ - float t = asfloat ((i << 23) + One); + float t = asfloat ((uint32_t) (i + 127) << 23); /* expm1(x) ~= p * t + (t - 1). */ return fmaf (p, t, t - 1); } -- cgit v1.2.3 From 1bca1a541cce13c352296acd5dfa16160fc27bc9 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:31 +0000 Subject: pl/math: Auto-generate mathbench and ulp headers Instead of maintaining three separate lists of routines, which are cumbersome and prone to merge conflicts, we provide a new macro, PL_SIG, which by some preprocessor machinery outputs the lists in the required format (macro formats have been changed very slightly to make the generation simpler). Only routines with simple signatures are handled - binary functions still need mathbench wrappers defined manually. As well, routines with non-standard references (i.e. powi/powk) still need entries and wrappers manually defined. --- pl/math/tanhf_2u6.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'pl/math/tanhf_2u6.c') diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c index 145f437..90f561f 100644 --- a/pl/math/tanhf_2u6.c +++ b/pl/math/tanhf_2u6.c @@ -5,6 +5,7 @@ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" +#include "pl_sig.h" #define BoringBound \ 0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for \ @@ -78,3 +79,5 @@ tanhf (float x) float q = expm1f_inline (2 * x); return q / (q + 2); } + +PL_SIG (S, F, 1, tanh, -10.0, 10.0) -- cgit v1.2.3 From ecb1c6f6ea7872645cb4c26514d5f64815b61a1b Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:39 +0000 Subject: pl/math: Move ULP limits to routine source files Introduces a new set of macros and Make rules for mechanically generating a list of ULP limits for each routine, to be consumed by runulp.sh. This removes the need to maintain long lists of thresholds in runulp.sh. --- pl/math/tanhf_2u6.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'pl/math/tanhf_2u6.c') diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c index 90f561f..e6cbbd0 100644 --- a/pl/math/tanhf_2u6.c +++ b/pl/math/tanhf_2u6.c @@ -6,6 +6,7 @@ */ #include "math_config.h" #include "pl_sig.h" +#include "pl_test.h" #define BoringBound \ 0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for \ @@ -81,3 +82,4 @@ tanhf (float x) } PL_SIG (S, F, 1, tanh, -10.0, 10.0) +PL_TEST_ULP (tanhf, 2.09) -- cgit v1.2.3 From 202e46317ee8983516b6413066a57bd624ffa044 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:28:06 +0000 Subject: pl/math: Move test intervals to routine source files To conclude the work on simplifying the runulp.sh script, a new macro has been introduced to specify the intervals in which a routine should be tested in the routine source. This is eventually consumed by runulp.sh. --- pl/math/tanhf_2u6.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'pl/math/tanhf_2u6.c') diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c index e6cbbd0..745e5e3 100644 --- a/pl/math/tanhf_2u6.c +++ b/pl/math/tanhf_2u6.c @@ -83,3 +83,9 @@ tanhf (float x) PL_SIG (S, F, 1, tanh, -10.0, 10.0) PL_TEST_ULP (tanhf, 2.09) +PL_TEST_INTERVAL (tanhf, 0, 0x1p-23, 1000) +PL_TEST_INTERVAL (tanhf, -0, -0x1p-23, 1000) +PL_TEST_INTERVAL (tanhf, 0x1p-23, 0x1.205966p+3, 100000) +PL_TEST_INTERVAL (tanhf, -0x1p-23, -0x1.205966p+3, 100000) +PL_TEST_INTERVAL (tanhf, 0x1.205966p+3, inf, 100) +PL_TEST_INTERVAL (tanhf, -0x1.205966p+3, -inf, 100) -- cgit v1.2.3 From f0f80b8a19b2593491847ed87456694d789f6f80 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Fri, 6 Jan 2023 09:10:57 +0000 Subject: pl/math: Update copyright years All files in pl/math updated to 2023. --- pl/math/tanhf_2u6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pl/math/tanhf_2u6.c') diff --git a/pl/math/tanhf_2u6.c b/pl/math/tanhf_2u6.c index 745e5e3..76e54a4 100644 --- a/pl/math/tanhf_2u6.c +++ b/pl/math/tanhf_2u6.c @@ -1,7 +1,7 @@ /* * Single-precision tanh(x) function. * - * Copyright (c) 2022, Arm Limited. + * Copyright (c) 2022-2023, Arm Limited. * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "math_config.h" -- cgit v1.2.3