diff options
-rw-r--r-- | pl/math/atan_2u5.c | 73 | ||||
-rw-r--r-- | pl/math/include/mathlib.h | 1 | ||||
-rw-r--r-- | pl/math/test/testcases/directed/atan.tst | 22 | ||||
-rw-r--r-- | pl/math/v_atan_2u5.c | 29 |
4 files changed, 117 insertions, 8 deletions
diff --git a/pl/math/atan_2u5.c b/pl/math/atan_2u5.c new file mode 100644 index 0000000..99fea0f --- /dev/null +++ b/pl/math/atan_2u5.c @@ -0,0 +1,73 @@ +/* + * Double-precision atan(x) function. + * + * Copyright (c) 2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "pl_sig.h" +#include "pl_test.h" +#include "atan_common.h" + +#define AbsMask 0x7fffffffffffffff +#define PiOver2 0x1.921fb54442d18p+0 +#define TinyBound 0x3e1 /* top12(asuint64(0x1p-30)). */ +#define BigBound 0x434 /* top12(asuint64(0x1p53)). */ +#define OneTop 0x3ff + +/* Fast implementation of double-precision atan. + Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using + z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps: + atan(0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1 + want 0x1.9225645bdd7c3p-1. */ +double +atan (double x) +{ + uint64_t ix = asuint64 (x); + uint64_t sign = ix & ~AbsMask; + uint64_t ia = ix & AbsMask; + uint32_t ia12 = ia >> 52; + + if (unlikely (ia12 >= BigBound || ia12 < TinyBound)) + { + if (ia12 < TinyBound) + /* Avoid underflow by returning x. */ + return x; + if (ia > 0x7ff0000000000000) + /* Propagate NaN. */ + return __math_invalid (x); + /* atan(x) rounds to PiOver2 for large x. */ + return asdouble (asuint64 (PiOver2) ^ sign); + } + + double z, az, shift; + if (ia12 >= OneTop) + { + /* For x > 1, use atan(x) = pi / 2 + atan(-1 / x). */ + z = -1.0 / x; + shift = PiOver2; + /* Use absolute value only when needed (odd powers of z). */ + az = -fabs (z); + } + else + { + /* For x < 1, approximate atan(x) directly. */ + z = x; + shift = 0; + az = asdouble (ia); + } + + /* Calculate polynomial, shift + z + z^3 * P(z^2). */ + double y = eval_poly (z, az, shift); + /* Copy sign. */ + return asdouble (asuint64 (y) ^ sign); +} + +PL_SIG (S, D, 1, atan, -10.0, 10.0) +PL_TEST_ULP (atan, 1.78) +PL_TEST_INTERVAL (atan, 0, 0x1p-30, 10000) +PL_TEST_INTERVAL (atan, -0, -0x1p-30, 1000) +PL_TEST_INTERVAL (atan, 0x1p-30, 0x1p53, 900000) +PL_TEST_INTERVAL (atan, -0x1p-30, -0x1p53, 90000) +PL_TEST_INTERVAL (atan, 0x1p53, inf, 10000) +PL_TEST_INTERVAL (atan, -0x1p53, -inf, 1000) diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h index 44cbc73..05fa306 100644 --- a/pl/math/include/mathlib.h +++ b/pl/math/include/mathlib.h @@ -27,6 +27,7 @@ float tanhf (float); double acosh (double); double asinh (double); +double atan (double); double atan2 (double, double); double cbrt (double); double cosh (double); diff --git a/pl/math/test/testcases/directed/atan.tst b/pl/math/test/testcases/directed/atan.tst new file mode 100644 index 0000000..5716276 --- /dev/null +++ b/pl/math/test/testcases/directed/atan.tst @@ -0,0 +1,22 @@ +; atan.tst +; +; Copyright 1999-2022, Arm Limited. +; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + +func=atan op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 +func=atan op1=fff80000.00000001 result=7ff80000.00000001 errno=0 +func=atan op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i +func=atan op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i +func=atan op1=7ff00000.00000000 result=3ff921fb.54442d18.469 errno=0 +func=atan op1=fff00000.00000000 result=bff921fb.54442d18.469 errno=0 +func=atan op1=00000000.00000000 result=00000000.00000000 errno=0 +func=atan op1=80000000.00000000 result=80000000.00000000 errno=0 +; Inconsistent behavior was detected for the following 2 cases. +; No exception is raised with certain versions of glibc. Functions +; approximated by x near zero may not generate/implement flops and +; thus may not raise exceptions. +func=atan op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux +func=atan op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux + +func=atan op1=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0 +func=atan op1=bff00000.00000000 result=bfe921fb.54442d18.469 errno=0 diff --git a/pl/math/v_atan_2u5.c b/pl/math/v_atan_2u5.c index 43b4abd..92479ab 100644 --- a/pl/math/v_atan_2u5.c +++ b/pl/math/v_atan_2u5.c @@ -15,6 +15,8 @@ #define PiOver2 v_f64 (0x1.921fb54442d18p+0) #define AbsMask v_u64 (0x7fffffffffffffff) +#define TinyBound 0x3e1 /* top12(asuint64(0x1p-30)). */ +#define BigBound 0x434 /* top12(asuint64(0x1p53)). */ /* Fast implementation of vector atan. Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using @@ -24,11 +26,20 @@ VPCS_ATTR v_f64_t V_NAME (atan) (v_f64_t x) { - /* No need to trigger special case. Small cases, infs and nans - are supported by our approximation technique. */ + /* Small cases, infs and nans are supported by our approximation technique, + but do not set fenv flags correctly. Only trigger special case if we need + fenv. */ v_u64_t ix = v_as_u64_f64 (x); v_u64_t sign = ix & ~AbsMask; +#if WANT_SIMD_EXCEPT + v_u64_t ia12 = (ix >> 52) & 0x7ff; + v_u64_t special = v_cond_u64 (ia12 - TinyBound > BigBound - TinyBound); + /* If any lane is special, fall back to the scalar routine for all lanes. */ + if (unlikely (v_any_u64 (special))) + return v_call_f64 (atan, x, v_f64 (0), v_u64 (-1)); +#endif + /* Argument reduction: y := arctan(x) for x < 1 y := pi/2 + arctan(-1/x) for x > 1 @@ -46,16 +57,18 @@ v_f64_t V_NAME (atan) (v_f64_t x) /* y = atan(x) if x>0, -atan(-x) otherwise. */ y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign); - return y; } VPCS_ALIAS PL_SIG (V, D, 1, atan, -10.0, 10.0) PL_TEST_ULP (V_NAME (atan), 1.78) -PL_TEST_INTERVAL (V_NAME (atan), -10.0, 10.0, 50000) -PL_TEST_INTERVAL (V_NAME (atan), -1.0, 1.0, 40000) -PL_TEST_INTERVAL (V_NAME (atan), 0.0, 1.0, 40000) -PL_TEST_INTERVAL (V_NAME (atan), 1.0, 100.0, 40000) -PL_TEST_INTERVAL (V_NAME (atan), 1e6, 1e32, 40000) +PL_TEST_EXPECT_FENV (V_NAME (atan), WANT_SIMD_EXCEPT) +PL_TEST_INTERVAL (V_NAME (atan), 0, 0x1p-30, 10000) +PL_TEST_INTERVAL (V_NAME (atan), -0, -0x1p-30, 1000) +PL_TEST_INTERVAL (V_NAME (atan), 0x1p-30, 0x1p53, 900000) +PL_TEST_INTERVAL (V_NAME (atan), -0x1p-30, -0x1p53, 90000) +PL_TEST_INTERVAL (V_NAME (atan), 0x1p53, inf, 10000) +PL_TEST_INTERVAL (V_NAME (atan), -0x1p53, -inf, 1000) + #endif |