aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pl/math/atan_2u5.c73
-rw-r--r--pl/math/include/mathlib.h1
-rw-r--r--pl/math/test/testcases/directed/atan.tst22
-rw-r--r--pl/math/v_atan_2u5.c29
4 files changed, 117 insertions, 8 deletions
diff --git a/pl/math/atan_2u5.c b/pl/math/atan_2u5.c
new file mode 100644
index 0000000..99fea0f
--- /dev/null
+++ b/pl/math/atan_2u5.c
@@ -0,0 +1,73 @@
+/*
+ * Double-precision atan(x) function.
+ *
+ * Copyright (c) 2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "atan_common.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define PiOver2 0x1.921fb54442d18p+0
+#define TinyBound 0x3e1 /* top12(asuint64(0x1p-30)). */
+#define BigBound 0x434 /* top12(asuint64(0x1p53)). */
+#define OneTop 0x3ff
+
+/* Fast implementation of double-precision atan.
+ Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
+ z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps:
+ atan(0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
+ want 0x1.9225645bdd7c3p-1. */
+double
+atan (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t sign = ix & ~AbsMask;
+ uint64_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 52;
+
+ if (unlikely (ia12 >= BigBound || ia12 < TinyBound))
+ {
+ if (ia12 < TinyBound)
+ /* Avoid underflow by returning x. */
+ return x;
+ if (ia > 0x7ff0000000000000)
+ /* Propagate NaN. */
+ return __math_invalid (x);
+ /* atan(x) rounds to PiOver2 for large x. */
+ return asdouble (asuint64 (PiOver2) ^ sign);
+ }
+
+ double z, az, shift;
+ if (ia12 >= OneTop)
+ {
+ /* For x > 1, use atan(x) = pi / 2 + atan(-1 / x). */
+ z = -1.0 / x;
+ shift = PiOver2;
+ /* Use absolute value only when needed (odd powers of z). */
+ az = -fabs (z);
+ }
+ else
+ {
+ /* For x < 1, approximate atan(x) directly. */
+ z = x;
+ shift = 0;
+ az = asdouble (ia);
+ }
+
+ /* Calculate polynomial, shift + z + z^3 * P(z^2). */
+ double y = eval_poly (z, az, shift);
+ /* Copy sign. */
+ return asdouble (asuint64 (y) ^ sign);
+}
+
+PL_SIG (S, D, 1, atan, -10.0, 10.0)
+PL_TEST_ULP (atan, 1.78)
+PL_TEST_INTERVAL (atan, 0, 0x1p-30, 10000)
+PL_TEST_INTERVAL (atan, -0, -0x1p-30, 1000)
+PL_TEST_INTERVAL (atan, 0x1p-30, 0x1p53, 900000)
+PL_TEST_INTERVAL (atan, -0x1p-30, -0x1p53, 90000)
+PL_TEST_INTERVAL (atan, 0x1p53, inf, 10000)
+PL_TEST_INTERVAL (atan, -0x1p53, -inf, 1000)
diff --git a/pl/math/include/mathlib.h b/pl/math/include/mathlib.h
index 44cbc73..05fa306 100644
--- a/pl/math/include/mathlib.h
+++ b/pl/math/include/mathlib.h
@@ -27,6 +27,7 @@ float tanhf (float);
double acosh (double);
double asinh (double);
+double atan (double);
double atan2 (double, double);
double cbrt (double);
double cosh (double);
diff --git a/pl/math/test/testcases/directed/atan.tst b/pl/math/test/testcases/directed/atan.tst
new file mode 100644
index 0000000..5716276
--- /dev/null
+++ b/pl/math/test/testcases/directed/atan.tst
@@ -0,0 +1,22 @@
+; atan.tst
+;
+; Copyright 1999-2022, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atan op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan op1=7ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan op1=fff00000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan op1=00000000.00000000 result=00000000.00000000 errno=0
+func=atan op1=80000000.00000000 result=80000000.00000000 errno=0
+; Inconsistent behavior was detected for the following 2 cases.
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=atan op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=atan op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
+
+func=atan op1=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
+func=atan op1=bff00000.00000000 result=bfe921fb.54442d18.469 errno=0
diff --git a/pl/math/v_atan_2u5.c b/pl/math/v_atan_2u5.c
index 43b4abd..92479ab 100644
--- a/pl/math/v_atan_2u5.c
+++ b/pl/math/v_atan_2u5.c
@@ -15,6 +15,8 @@
#define PiOver2 v_f64 (0x1.921fb54442d18p+0)
#define AbsMask v_u64 (0x7fffffffffffffff)
+#define TinyBound 0x3e1 /* top12(asuint64(0x1p-30)). */
+#define BigBound 0x434 /* top12(asuint64(0x1p53)). */
/* Fast implementation of vector atan.
Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
@@ -24,11 +26,20 @@
VPCS_ATTR
v_f64_t V_NAME (atan) (v_f64_t x)
{
- /* No need to trigger special case. Small cases, infs and nans
- are supported by our approximation technique. */
+ /* Small cases, infs and nans are supported by our approximation technique,
+ but do not set fenv flags correctly. Only trigger special case if we need
+ fenv. */
v_u64_t ix = v_as_u64_f64 (x);
v_u64_t sign = ix & ~AbsMask;
+#if WANT_SIMD_EXCEPT
+ v_u64_t ia12 = (ix >> 52) & 0x7ff;
+ v_u64_t special = v_cond_u64 (ia12 - TinyBound > BigBound - TinyBound);
+ /* If any lane is special, fall back to the scalar routine for all lanes. */
+ if (unlikely (v_any_u64 (special)))
+ return v_call_f64 (atan, x, v_f64 (0), v_u64 (-1));
+#endif
+
/* Argument reduction:
y := arctan(x) for x < 1
y := pi/2 + arctan(-1/x) for x > 1
@@ -46,16 +57,18 @@ v_f64_t V_NAME (atan) (v_f64_t x)
/* y = atan(x) if x>0, -atan(-x) otherwise. */
y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign);
-
return y;
}
VPCS_ALIAS
PL_SIG (V, D, 1, atan, -10.0, 10.0)
PL_TEST_ULP (V_NAME (atan), 1.78)
-PL_TEST_INTERVAL (V_NAME (atan), -10.0, 10.0, 50000)
-PL_TEST_INTERVAL (V_NAME (atan), -1.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME (atan), 0.0, 1.0, 40000)
-PL_TEST_INTERVAL (V_NAME (atan), 1.0, 100.0, 40000)
-PL_TEST_INTERVAL (V_NAME (atan), 1e6, 1e32, 40000)
+PL_TEST_EXPECT_FENV (V_NAME (atan), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME (atan), 0, 0x1p-30, 10000)
+PL_TEST_INTERVAL (V_NAME (atan), -0, -0x1p-30, 1000)
+PL_TEST_INTERVAL (V_NAME (atan), 0x1p-30, 0x1p53, 900000)
+PL_TEST_INTERVAL (V_NAME (atan), -0x1p-30, -0x1p53, 90000)
+PL_TEST_INTERVAL (V_NAME (atan), 0x1p53, inf, 10000)
+PL_TEST_INTERVAL (V_NAME (atan), -0x1p53, -inf, 1000)
+
#endif