aboutsummaryrefslogtreecommitdiff
path: root/pl/math/erfcf_2u.c
diff options
context:
space:
mode:
Diffstat (limited to 'pl/math/erfcf_2u.c')
-rw-r--r--pl/math/erfcf_2u.c133
1 files changed, 133 insertions, 0 deletions
diff --git a/pl/math/erfcf_2u.c b/pl/math/erfcf_2u.c
new file mode 100644
index 0000000..5a3f9b0
--- /dev/null
+++ b/pl/math/erfcf_2u.c
@@ -0,0 +1,133 @@
+/*
+ * Single-precision erfc(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "erfcf.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define P(i) __erfcf_poly_data.poly[i]
+
+/* Approximation of erfcf for |x| > 4.0. */
+static inline float
+approx_erfcf_hi (float x, uint32_t sign, const double *coeff)
+{
+ if (sign)
+ {
+ return 2.0f;
+ }
+
+ /* Polynomial contribution. */
+ double z = (double) fabs (x);
+ float p = (float) eval_poly (z, coeff);
+ /* Gaussian contribution. */
+ float e_mx2 = (float) eval_exp_mx2 (z);
+
+ return p * e_mx2;
+}
+
+/* Approximation of erfcf for |x| < 4.0. */
+static inline float
+approx_erfcf_lo (float x, uint32_t sign, const double *coeff)
+{
+ /* Polynomial contribution. */
+ double z = (double) fabs (x);
+ float p = (float) eval_poly (z, coeff);
+ /* Gaussian contribution. */
+ float e_mx2 = (float) eval_exp_mx2 (z);
+
+ if (sign)
+ return fmaf (-p, e_mx2, 2.0f);
+ else
+ return p * e_mx2;
+}
+
+/* Top 12 bits of a float (sign and exponent bits). */
+static inline uint32_t
+abstop12 (float x)
+{
+ return (asuint (x) >> 20) & 0x7ff;
+}
+
+/* Top 12 bits of a float. */
+static inline uint32_t
+top12 (float x)
+{
+ return asuint (x) >> 20;
+}
+
+/* Fast erfcf approximation using polynomial approximation
+ multiplied by gaussian.
+ Most of the computation is carried out in double precision,
+ and is very sensitive to accuracy of polynomial and exp
+ evaluation.
+ Worst-case error is 1.968ulps, obtained for x = 2.0412941.
+ erfcf(0x1.05492p+1) got 0x1.fe10f6p-9 want 0x1.fe10f2p-9 ulp
+ err 1.46788. */
+float
+erfcf (float x)
+{
+ /* Get top words and sign. */
+ uint32_t ix = asuint (x); /* We need to compare at most 32 bits. */
+ uint32_t sign = ix >> 31;
+ uint32_t ia12 = top12 (x) & 0x7ff;
+
+ /* Handle special cases and small values with a single comparison:
+ abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)
+
+ Special cases
+ erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2
+
+ Errno
+ EDOM does not have to be set in case of erfcf(nan).
+ Only ERANGE may be set in case of underflow.
+
+ Small values (|x|<small)
+ |x|<0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328). */
+ if (unlikely (abstop12 (x) - 0x328 >= (abstop12 (INFINITY) & 0x7f8) - 0x328))
+ {
+ if (abstop12 (x) >= 0x7f8)
+ return (float) (sign << 1) + 1.0f / x; /* Special cases. */
+ else
+ return 1.0f - x; /* Small case. */
+ }
+
+ /* Normalized numbers divided in 4 intervals
+ with bounds: 2.0, 4.0, 8.0 and 10.0. 10 was chosen as the upper bound for
+ the interesting region as it is the smallest value, representable as a
+ 12-bit integer, for which returning 0 gives <1.5 ULP. */
+ if (ia12 < 0x400)
+ {
+ return approx_erfcf_lo (x, sign, P (0));
+ }
+ if (ia12 < 0x408)
+ {
+ return approx_erfcf_lo (x, sign, P (1));
+ }
+ if (ia12 < 0x410)
+ {
+ return approx_erfcf_hi (x, sign, P (2));
+ }
+ if (ia12 < 0x412)
+ {
+ return approx_erfcf_hi (x, sign, P (3));
+ }
+ if (sign)
+ {
+ return 2.0f;
+ }
+ return __math_uflowf (0);
+}
+
+PL_SIG (S, F, 1, erfc, -4.0, 10.0)
+PL_TEST_ULP (erfcf, 1.5)
+PL_TEST_INTERVAL (erfcf, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (erfcf, 0x1p-127, 0x1p-26, 40000)
+PL_TEST_INTERVAL (erfcf, -0x1p-127, -0x1p-26, 40000)
+PL_TEST_INTERVAL (erfcf, 0x1p-26, 0x1p5, 40000)
+PL_TEST_INTERVAL (erfcf, -0x1p-26, -0x1p3, 40000)
+PL_TEST_INTERVAL (erfcf, 0, inf, 40000)