From 0b4165a41d49927b57daa53350d9d33b25475fac Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 21 Jul 2022 13:56:01 +0100 Subject: pl/math: Add Vector/SVE cos. An implementation based on SVE trigonometric instructions. It relies on the same range reduction as Vector/Neon cos, with a slight modification of the shift. The maximum measured error is 2.11ULPs around x = 205.522. --- pl/math/sv_cos_2u5.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 pl/math/sv_cos_2u5.c (limited to 'pl/math/sv_cos_2u5.c') diff --git a/pl/math/sv_cos_2u5.c b/pl/math/sv_cos_2u5.c new file mode 100644 index 0000000..483c73f --- /dev/null +++ b/pl/math/sv_cos_2u5.c @@ -0,0 +1,77 @@ +/* + * Double-precision SVE cos(x) function. + * + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "sv_math.h" +#if SV_SUPPORTED + +#define InvPio2 (sv_f64 (0x1.45f306dc9c882p-1)) +#define NegPio2_1 (sv_f64 (-0x1.921fb50000000p+0)) +#define NegPio2_2 (sv_f64 (-0x1.110b460000000p-26)) +#define NegPio2_3 (sv_f64 (-0x1.1a62633145c07p-54)) +/* Original shift used in Neon cos, + plus a contribution to set the bit #0 of q + as expected by trigonometric instructions. */ +#define Shift (sv_f64 (0x1.8000000000001p52)) +#define RangeVal (sv_f64 (0x1p23)) +#define AbsMask (0x7fffffffffffffff) + +static NOINLINE sv_f64_t +__sv_cos_specialcase (sv_f64_t x, sv_f64_t y, svbool_t cmp) +{ + return sv_call_f64 (cos, x, y, cmp); +} + +/* A fast SVE implementation of cos based on trigonometric + instructions (FTMAD, FTSSEL, FTSMUL). + Maximum measured error: 2.108 ULPs. + __sv_cos(0x1.9b0ba158c98f3p+7) got -0x1.fddd4c65c7f07p-3 + want -0x1.fddd4c65c7f05p-3. */ +sv_f64_t +__sv_cos_x (sv_f64_t x, const svbool_t pg) +{ + sv_f64_t n, r, r2, y; + svbool_t cmp; + + r = sv_as_f64_u64 (svand_n_u64_x (pg, sv_as_u64_f64 (x), AbsMask)); + cmp = svcmpge_u64 (pg, sv_as_u64_f64 (r), sv_as_u64_f64 (RangeVal)); + + /* n = rint(|x|/(pi/2)). */ + sv_f64_t q = sv_fma_f64_x (pg, InvPio2, r, Shift); + n = svsub_f64_x (pg, q, Shift); + + /* r = |x| - n*(pi/2) (range reduction into -pi/4 .. pi/4). */ + r = sv_fma_f64_x (pg, NegPio2_1, n, r); + r = sv_fma_f64_x (pg, NegPio2_2, n, r); + r = sv_fma_f64_x (pg, NegPio2_3, n, r); + + /* cos(r) poly approx. */ + r2 = svtsmul_f64 (r, sv_as_u64_f64 (q)); + y = sv_f64 (0.0); + y = svtmad_f64 (y, r2, 7); + y = svtmad_f64 (y, r2, 6); + y = svtmad_f64 (y, r2, 5); + y = svtmad_f64 (y, r2, 4); + y = svtmad_f64 (y, r2, 3); + y = svtmad_f64 (y, r2, 2); + y = svtmad_f64 (y, r2, 1); + y = svtmad_f64 (y, r2, 0); + + /* Final multiplicative factor: 1.0 or x depending on bit #0 of q. */ + sv_f64_t f = svtssel_f64 (r, sv_as_u64_f64 (q)); + /* Apply factor. */ + y = svmul_f64_x (pg, f, y); + + /* No need to pass pg to specialcase here since cmp is a strict subset, + guaranteed by the cmpge above. */ + if (unlikely (svptest_any (pg, cmp))) + return __sv_cos_specialcase (x, y, cmp); + return y; +} + +strong_alias (__sv_cos_x, _ZGVsMxv_cos) + +#endif -- cgit v1.2.3 From 1bca1a541cce13c352296acd5dfa16160fc27bc9 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:31 +0000 Subject: pl/math: Auto-generate mathbench and ulp headers Instead of maintaining three separate lists of routines, which are cumbersome and prone to merge conflicts, we provide a new macro, PL_SIG, which by some preprocessor machinery outputs the lists in the required format (macro formats have been changed very slightly to make the generation simpler). Only routines with simple signatures are handled - binary functions still need mathbench wrappers defined manually. As well, routines with non-standard references (i.e. powi/powk) still need entries and wrappers manually defined. --- pl/math/sv_cos_2u5.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'pl/math/sv_cos_2u5.c') diff --git a/pl/math/sv_cos_2u5.c b/pl/math/sv_cos_2u5.c index 483c73f..a19be9b 100644 --- a/pl/math/sv_cos_2u5.c +++ b/pl/math/sv_cos_2u5.c @@ -6,6 +6,8 @@ */ #include "sv_math.h" +#include "pl_sig.h" + #if SV_SUPPORTED #define InvPio2 (sv_f64 (0x1.45f306dc9c882p-1)) @@ -74,4 +76,5 @@ __sv_cos_x (sv_f64_t x, const svbool_t pg) strong_alias (__sv_cos_x, _ZGVsMxv_cos) + PL_SIG (SV, D, 1, cos, -3.1, 3.1) #endif -- cgit v1.2.3 From ecb1c6f6ea7872645cb4c26514d5f64815b61a1b Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:39 +0000 Subject: pl/math: Move ULP limits to routine source files Introduces a new set of macros and Make rules for mechanically generating a list of ULP limits for each routine, to be consumed by runulp.sh. This removes the need to maintain long lists of thresholds in runulp.sh. --- pl/math/sv_cos_2u5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'pl/math/sv_cos_2u5.c') diff --git a/pl/math/sv_cos_2u5.c b/pl/math/sv_cos_2u5.c index a19be9b..146ca22 100644 --- a/pl/math/sv_cos_2u5.c +++ b/pl/math/sv_cos_2u5.c @@ -7,6 +7,7 @@ #include "sv_math.h" #include "pl_sig.h" +#include "pl_test.h" #if SV_SUPPORTED @@ -74,7 +75,8 @@ __sv_cos_x (sv_f64_t x, const svbool_t pg) return y; } -strong_alias (__sv_cos_x, _ZGVsMxv_cos) +PL_ALIAS (__sv_cos_x, _ZGVsMxv_cos) - PL_SIG (SV, D, 1, cos, -3.1, 3.1) +PL_SIG (SV, D, 1, cos, -3.1, 3.1) +PL_TEST_ULP (__sv_cos, 1.61) #endif -- cgit v1.2.3 From 202e46317ee8983516b6413066a57bd624ffa044 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:28:06 +0000 Subject: pl/math: Move test intervals to routine source files To conclude the work on simplifying the runulp.sh script, a new macro has been introduced to specify the intervals in which a routine should be tested in the routine source. This is eventually consumed by runulp.sh. --- pl/math/sv_cos_2u5.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'pl/math/sv_cos_2u5.c') diff --git a/pl/math/sv_cos_2u5.c b/pl/math/sv_cos_2u5.c index 146ca22..a06ab9a 100644 --- a/pl/math/sv_cos_2u5.c +++ b/pl/math/sv_cos_2u5.c @@ -79,4 +79,6 @@ PL_ALIAS (__sv_cos_x, _ZGVsMxv_cos) PL_SIG (SV, D, 1, cos, -3.1, 3.1) PL_TEST_ULP (__sv_cos, 1.61) +PL_TEST_INTERVAL (__sv_cos, 0, 0xffff0000, 10000) +PL_TEST_INTERVAL (__sv_cos, 0x1p-4, 0x1p4, 500000) #endif -- cgit v1.2.3 From f0f80b8a19b2593491847ed87456694d789f6f80 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Fri, 6 Jan 2023 09:10:57 +0000 Subject: pl/math: Update copyright years All files in pl/math updated to 2023. --- pl/math/sv_cos_2u5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pl/math/sv_cos_2u5.c') diff --git a/pl/math/sv_cos_2u5.c b/pl/math/sv_cos_2u5.c index a06ab9a..1940348 100644 --- a/pl/math/sv_cos_2u5.c +++ b/pl/math/sv_cos_2u5.c @@ -1,7 +1,7 @@ /* * Double-precision SVE cos(x) function. * - * Copyright (c) 2019-2022, Arm Limited. + * Copyright (c) 2019-2023, Arm Limited. * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -- cgit v1.2.3