diff options
Diffstat (limited to 'pl/math/erfc_4u5.c')
-rw-r--r-- | pl/math/erfc_4u5.c | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/pl/math/erfc_4u5.c b/pl/math/erfc_4u5.c new file mode 100644 index 0000000..e9af9d3 --- /dev/null +++ b/pl/math/erfc_4u5.c @@ -0,0 +1,155 @@ +/* + * Double-precision erfc(x) function. + * + * Copyright (c) 2019-2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "math_config.h" +#include "pairwise_horner.h" +#include "pl_sig.h" +#include "pl_test.h" + +#define AbsMask (0x7fffffffffffffff) + +#define xint __erfc_data.interval_bounds +#define PX __erfc_data.poly + +/* Accurate exponential from optimized routines. */ +double +__exp_dd (double x, double xtail); + +static inline double +eval_poly_horner (double z, int i) +{ + double z2 = z * z; +#define C(j) PX[i][j] + return PAIRWISE_HORNER_12 (z, z2, C); +} + +/* Accurate evaluation of exp(x^2) + using compensated product (x^2 ~ x*x + e2) + and the __exp_dd(y,d) routine, that is the + computation of exp(y+d) with a small correction d<<y. */ +static inline double +eval_accurate_gaussian (double a) +{ + double e2; + double a2 = a * a; + double aa1 = -fma (0x1.0000002p27, a, -a); + aa1 = fma (0x1.0000002p27, a, aa1); + double aa2 = a - aa1; + e2 = fma (-aa1, aa1, a2); + e2 = fma (-aa1, aa2, e2); + e2 = fma (-aa2, aa1, e2); + e2 = fma (-aa2, aa2, e2); + return __exp_dd (-a2, e2); +} + +/* Approximation of erfc for |x| > 6.0. */ +static inline double +approx_erfc_hi (double x, int i) +{ + double a = fabs (x); + double z = a - xint[i]; + double p = eval_poly_horner (z, i); + double e_mx2 = eval_accurate_gaussian (a); + return p * e_mx2; +} + +static inline int +get_itv_idx (double x) +{ + /* Interval bounds are a logarithmic scale, i.e. interval n has + lower bound 2^(n/4) - 1. Use the exponent of (|x|+1)^4 to obtain + the interval index. */ + double a = asdouble (asuint64 (x) & AbsMask); + double z = a + 1.0; + z = z * z; + z = z * z; + return (asuint64 (z) >> 52) - 1023; +} + +/* Approximation of erfc for |x| < 6.0. */ +static inline double +approx_erfc_lo (double x, uint32_t sign, int i) +{ + double a = fabs (x); + double z = a - xint[i]; + double p = eval_poly_horner (z, i); + double e_mx2 = eval_accurate_gaussian (a); + if (sign) + return fma (-p, e_mx2, 2.0); + else + return p * e_mx2; +} + +/* Top 12 bits of a double (sign and exponent bits). */ +static inline uint32_t +abstop12 (double x) +{ + return (asuint64 (x) >> 52) & 0x7ff; +} + +/* Top 32 bits of a double. */ +static inline uint32_t +top32 (double x) +{ + return asuint64 (x) >> 32; +} + +/* Fast erfc implementation. + The approximation uses polynomial approximation of + exp(x^2) * erfc(x) with fixed orders on 20 intervals. + Maximum measured error is 4.05 ULPs:. + erfc(0x1.e8ebf6a2b0801p-2) got 0x1.ff84036f8f0b3p-2 + want 0x1.ff84036f8f0b7p-2. */ +double +erfc (double x) +{ + /* Get top words. */ + uint32_t ix = top32 (x); /* We need to compare at most 32 bits. */ + uint32_t ia = ix & 0x7fffffff; + uint32_t sign = ix >> 31; + + /* Handle special cases and small values with a single comparison: + abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small) + Special cases erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2 + Errno EDOM does not have to be set in case of erfc(nan). + Only ERANGE may be set in case of underflow. + Small values (|x|<small) + |x|<0x1.0p-56 => accurate up to 0.5 ULP (top12(0x1p-50) = 0x3c7) + |x|<0x1.0p-50 => accurate up to 1.0 ULP (top12(0x1p-50) = 0x3cd). */ + if (unlikely (abstop12 (x) - 0x3cd >= (abstop12 (INFINITY) & 0x7ff) - 0x3cd)) + { + if (abstop12 (x) >= 0x7ff) + return (double) (sign << 1) + 1.0 / x; /* special cases. */ + else + return 1.0 - x; /* small case. */ + } + else if (ia < 0x40180000) + { /* |x| < 6.0. */ + return approx_erfc_lo (x, sign, get_itv_idx (x)); + } + else if (sign) + { /* x <= -6.0. */ + return 2.0; + } + else if (ia < 0x403c0000) + { /* 6.0 <= x < 28. */ + return approx_erfc_hi (x, get_itv_idx (x)); + } + else + { /* x > 28. */ + return __math_uflow (0); + } +} + +PL_SIG (S, D, 1, erfc, -6.0, 28.0) +PL_TEST_ULP (erfc, 3.56) +PL_TEST_INTERVAL (erfc, 0, 0xffff0000, 10000) +PL_TEST_INTERVAL (erfc, 0x1p-1022, 0x1p-26, 40000) +PL_TEST_INTERVAL (erfc, -0x1p-1022, -0x1p-26, 40000) +PL_TEST_INTERVAL (erfc, 0x1p-26, 0x1p5, 40000) +PL_TEST_INTERVAL (erfc, -0x1p-26, -0x1p3, 40000) +PL_TEST_INTERVAL (erfc, 0, inf, 40000) |