From 47eb0a883fb82fcd394353920e2cca4d0a0ffe9d Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Fri, 17 Jun 2022 11:09:34 +0100 Subject: pl/math: Add vector/Neon atan2f Successfully ran tests and benchmarks. New routine is accurate to 3 ulps. --- pl/math/v_atan2f_3u.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 pl/math/v_atan2f_3u.c (limited to 'pl/math/v_atan2f_3u.c') diff --git a/pl/math/v_atan2f_3u.c b/pl/math/v_atan2f_3u.c new file mode 100644 index 0000000..4212351 --- /dev/null +++ b/pl/math/v_atan2f_3u.c @@ -0,0 +1,78 @@ +/* + * Single-precision vector atan2(x) function. + * + * Copyright (c) 2021-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "v_math.h" +#if V_SUPPORTED + +#include "atanf_common.h" + +/* Useful constants. */ +#define PiOver2 v_f32 (0x1.921fb6p+0f) +#define SignMask v_u32 (0x80000000) + +/* Special cases i.e. 0, infinity and nan (fall back to scalar calls). */ +VPCS_ATTR +__attribute__ ((noinline)) static v_f32_t +specialcase (v_f32_t y, v_f32_t x, v_f32_t ret, v_u32_t cmp) +{ + return v_call2_f32 (atan2f, y, x, ret, cmp); +} + +/* Returns 1 if input is the bit representation of 0, infinity or nan. */ +static inline v_u32_t +zeroinfnan (v_u32_t i) +{ + return v_cond_u32 (2 * i - 1 >= v_u32 (2 * 0x7f800000lu - 1)); +} + +/* Fast implementation of vector atan2f. Maximum observed error is + 2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]: + v_atan2(0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1 + want 0x1.967f00p-1. */ +VPCS_ATTR +v_f32_t V_NAME (atan2f) (v_f32_t y, v_f32_t x) +{ + v_u32_t ix = v_as_u32_f32 (x); + v_u32_t iy = v_as_u32_f32 (y); + + v_u32_t special_cases = zeroinfnan (ix) | zeroinfnan (iy); + + v_u32_t sign_x = ix & SignMask; + v_u32_t sign_y = iy & SignMask; + v_u32_t sign_xy = sign_x ^ sign_y; + + v_f32_t ax = v_abs_f32 (x); + v_f32_t ay = v_abs_f32 (y); + + v_u32_t pred_xlt0 = x < 0.0f; + v_u32_t pred_aygtax = ay > ax; + + /* Set up z for call to atanf. */ + v_f32_t n = v_sel_f32 (pred_aygtax, -ax, ay); + v_f32_t d = v_sel_f32 (pred_aygtax, ay, ax); + v_f32_t z = v_div_f32 (n, d); + + /* Work out the correct shift. */ + v_f32_t shift = v_sel_f32 (pred_xlt0, v_f32 (-2.0f), v_f32 (0.0f)); + shift = v_sel_f32 (pred_aygtax, shift + 1.0f, shift); + shift *= PiOver2; + + v_f32_t ret = eval_poly (z, z, shift); + + /* Account for the sign of y. */ + ret = v_as_f32_u32 (v_as_u32_f32 (ret) ^ sign_xy); + + if (unlikely (v_any_u32 (special_cases))) + { + return specialcase (y, x, ret, special_cases); + } + + return ret; +} +VPCS_ALIAS + +#endif -- cgit v1.2.3 From 3d1a87e2fe152dc52d4a624425f5b2349a4088b0 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Mon, 15 Aug 2022 11:19:25 +0100 Subject: pl/math: Audit Neon special-case handlers Prevent inlining in most cases - change to use AOR style (NOINLINE). --- pl/math/v_atan2f_3u.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pl/math/v_atan2f_3u.c') diff --git a/pl/math/v_atan2f_3u.c b/pl/math/v_atan2f_3u.c index 4212351..dc0fbca 100644 --- a/pl/math/v_atan2f_3u.c +++ b/pl/math/v_atan2f_3u.c @@ -16,7 +16,7 @@ /* Special cases i.e. 0, infinity and nan (fall back to scalar calls). */ VPCS_ATTR -__attribute__ ((noinline)) static v_f32_t +NOINLINE static v_f32_t specialcase (v_f32_t y, v_f32_t x, v_f32_t ret, v_u32_t cmp) { return v_call2_f32 (atan2f, y, x, ret, cmp); -- cgit v1.2.3 From 1bca1a541cce13c352296acd5dfa16160fc27bc9 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:31 +0000 Subject: pl/math: Auto-generate mathbench and ulp headers Instead of maintaining three separate lists of routines, which are cumbersome and prone to merge conflicts, we provide a new macro, PL_SIG, which by some preprocessor machinery outputs the lists in the required format (macro formats have been changed very slightly to make the generation simpler). Only routines with simple signatures are handled - binary functions still need mathbench wrappers defined manually. As well, routines with non-standard references (i.e. powi/powk) still need entries and wrappers manually defined. --- pl/math/v_atan2f_3u.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'pl/math/v_atan2f_3u.c') diff --git a/pl/math/v_atan2f_3u.c b/pl/math/v_atan2f_3u.c index dc0fbca..8c2c8f2 100644 --- a/pl/math/v_atan2f_3u.c +++ b/pl/math/v_atan2f_3u.c @@ -6,6 +6,8 @@ */ #include "v_math.h" +#include "pl_sig.h" + #if V_SUPPORTED #include "atanf_common.h" @@ -75,4 +77,6 @@ v_f32_t V_NAME (atan2f) (v_f32_t y, v_f32_t x) } VPCS_ALIAS +/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */ +PL_SIG (V, F, 2, atan2) #endif -- cgit v1.2.3 From ecb1c6f6ea7872645cb4c26514d5f64815b61a1b Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:27:39 +0000 Subject: pl/math: Move ULP limits to routine source files Introduces a new set of macros and Make rules for mechanically generating a list of ULP limits for each routine, to be consumed by runulp.sh. This removes the need to maintain long lists of thresholds in runulp.sh. --- pl/math/v_atan2f_3u.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'pl/math/v_atan2f_3u.c') diff --git a/pl/math/v_atan2f_3u.c b/pl/math/v_atan2f_3u.c index 8c2c8f2..3d8f9fc 100644 --- a/pl/math/v_atan2f_3u.c +++ b/pl/math/v_atan2f_3u.c @@ -7,6 +7,7 @@ #include "v_math.h" #include "pl_sig.h" +#include "pl_test.h" #if V_SUPPORTED @@ -79,4 +80,5 @@ VPCS_ALIAS /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */ PL_SIG (V, F, 2, atan2) +PL_TEST_ULP (V_NAME (atan2f), 2.46) #endif -- cgit v1.2.3 From 202e46317ee8983516b6413066a57bd624ffa044 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Thu, 15 Dec 2022 13:28:06 +0000 Subject: pl/math: Move test intervals to routine source files To conclude the work on simplifying the runulp.sh script, a new macro has been introduced to specify the intervals in which a routine should be tested in the routine source. This is eventually consumed by runulp.sh. --- pl/math/v_atan2f_3u.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'pl/math/v_atan2f_3u.c') diff --git a/pl/math/v_atan2f_3u.c b/pl/math/v_atan2f_3u.c index 3d8f9fc..abf8f5e 100644 --- a/pl/math/v_atan2f_3u.c +++ b/pl/math/v_atan2f_3u.c @@ -81,4 +81,9 @@ VPCS_ALIAS /* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */ PL_SIG (V, F, 2, atan2) PL_TEST_ULP (V_NAME (atan2f), 2.46) +PL_TEST_INTERVAL (V_NAME (atan2f), -10.0, 10.0, 50000) +PL_TEST_INTERVAL (V_NAME (atan2f), -1.0, 1.0, 40000) +PL_TEST_INTERVAL (V_NAME (atan2f), 0.0, 1.0, 40000) +PL_TEST_INTERVAL (V_NAME (atan2f), 1.0, 100.0, 40000) +PL_TEST_INTERVAL (V_NAME (atan2f), 1e6, 1e32, 40000) #endif -- cgit v1.2.3 From f0f80b8a19b2593491847ed87456694d789f6f80 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Fri, 6 Jan 2023 09:10:57 +0000 Subject: pl/math: Update copyright years All files in pl/math updated to 2023. --- pl/math/v_atan2f_3u.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pl/math/v_atan2f_3u.c') diff --git a/pl/math/v_atan2f_3u.c b/pl/math/v_atan2f_3u.c index abf8f5e..5d1e6ca 100644 --- a/pl/math/v_atan2f_3u.c +++ b/pl/math/v_atan2f_3u.c @@ -1,7 +1,7 @@ /* * Single-precision vector atan2(x) function. * - * Copyright (c) 2021-2022, Arm Limited. + * Copyright (c) 2021-2023, Arm Limited. * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -- cgit v1.2.3