diff options
Diffstat (limited to 'pl/math/erfcf_2u.c')
-rw-r--r-- | pl/math/erfcf_2u.c | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/pl/math/erfcf_2u.c b/pl/math/erfcf_2u.c new file mode 100644 index 0000000..5a3f9b0 --- /dev/null +++ b/pl/math/erfcf_2u.c @@ -0,0 +1,133 @@ +/* + * Single-precision erfc(x) function. + * + * Copyright (c) 2019-2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "erfcf.h" +#include "math_config.h" +#include "pl_sig.h" +#include "pl_test.h" + +#define P(i) __erfcf_poly_data.poly[i] + +/* Approximation of erfcf for |x| > 4.0. */ +static inline float +approx_erfcf_hi (float x, uint32_t sign, const double *coeff) +{ + if (sign) + { + return 2.0f; + } + + /* Polynomial contribution. */ + double z = (double) fabs (x); + float p = (float) eval_poly (z, coeff); + /* Gaussian contribution. */ + float e_mx2 = (float) eval_exp_mx2 (z); + + return p * e_mx2; +} + +/* Approximation of erfcf for |x| < 4.0. */ +static inline float +approx_erfcf_lo (float x, uint32_t sign, const double *coeff) +{ + /* Polynomial contribution. */ + double z = (double) fabs (x); + float p = (float) eval_poly (z, coeff); + /* Gaussian contribution. */ + float e_mx2 = (float) eval_exp_mx2 (z); + + if (sign) + return fmaf (-p, e_mx2, 2.0f); + else + return p * e_mx2; +} + +/* Top 12 bits of a float (sign and exponent bits). */ +static inline uint32_t +abstop12 (float x) +{ + return (asuint (x) >> 20) & 0x7ff; +} + +/* Top 12 bits of a float. */ +static inline uint32_t +top12 (float x) +{ + return asuint (x) >> 20; +} + +/* Fast erfcf approximation using polynomial approximation + multiplied by gaussian. + Most of the computation is carried out in double precision, + and is very sensitive to accuracy of polynomial and exp + evaluation. + Worst-case error is 1.968ulps, obtained for x = 2.0412941. + erfcf(0x1.05492p+1) got 0x1.fe10f6p-9 want 0x1.fe10f2p-9 ulp + err 1.46788. */ +float +erfcf (float x) +{ + /* Get top words and sign. */ + uint32_t ix = asuint (x); /* We need to compare at most 32 bits. */ + uint32_t sign = ix >> 31; + uint32_t ia12 = top12 (x) & 0x7ff; + + /* Handle special cases and small values with a single comparison: + abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small) + + Special cases + erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2 + + Errno + EDOM does not have to be set in case of erfcf(nan). + Only ERANGE may be set in case of underflow. + + Small values (|x|<small) + |x|<0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328). */ + if (unlikely (abstop12 (x) - 0x328 >= (abstop12 (INFINITY) & 0x7f8) - 0x328)) + { + if (abstop12 (x) >= 0x7f8) + return (float) (sign << 1) + 1.0f / x; /* Special cases. */ + else + return 1.0f - x; /* Small case. */ + } + + /* Normalized numbers divided in 4 intervals + with bounds: 2.0, 4.0, 8.0 and 10.0. 10 was chosen as the upper bound for + the interesting region as it is the smallest value, representable as a + 12-bit integer, for which returning 0 gives <1.5 ULP. */ + if (ia12 < 0x400) + { + return approx_erfcf_lo (x, sign, P (0)); + } + if (ia12 < 0x408) + { + return approx_erfcf_lo (x, sign, P (1)); + } + if (ia12 < 0x410) + { + return approx_erfcf_hi (x, sign, P (2)); + } + if (ia12 < 0x412) + { + return approx_erfcf_hi (x, sign, P (3)); + } + if (sign) + { + return 2.0f; + } + return __math_uflowf (0); +} + +PL_SIG (S, F, 1, erfc, -4.0, 10.0) +PL_TEST_ULP (erfcf, 1.5) +PL_TEST_INTERVAL (erfcf, 0, 0xffff0000, 10000) +PL_TEST_INTERVAL (erfcf, 0x1p-127, 0x1p-26, 40000) +PL_TEST_INTERVAL (erfcf, -0x1p-127, -0x1p-26, 40000) +PL_TEST_INTERVAL (erfcf, 0x1p-26, 0x1p5, 40000) +PL_TEST_INTERVAL (erfcf, -0x1p-26, -0x1p3, 40000) +PL_TEST_INTERVAL (erfcf, 0, inf, 40000) |