/* * Double-precision vector erfc(x) function. * * Copyright (c) 2019-2023, Arm Limited. * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include "v_math.h" #include "horner.h" #include "math_config.h" #include "pl_sig.h" #include "pl_test.h" #if V_SUPPORTED /* Accurate exponential (vector variant of exp_dd). */ v_f64_t V_NAME (exp_tail) (v_f64_t, v_f64_t); #define One v_f64 (1.0) #define AbsMask v_u64 (0x7fffffffffffffff) #define Scale v_f64 (0x1.0000002p27) /* Coeffs for polynomial approximation on [0x1.0p-28., 31.]. */ #define PX __v_erfc_data.poly #define xint __v_erfc_data.interval_bounds /* Special cases (fall back to scalar calls). */ VPCS_ATTR NOINLINE static v_f64_t specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp) { return v_call_f64 (erfc, x, y, cmp); } /* A structure to perform look-up in coeffs and other parameter tables. */ struct entry { v_f64_t P[ERFC_POLY_ORDER + 1]; v_f64_t xi; }; static inline struct entry lookup (v_u64_t i) { struct entry e; #ifdef SCALAR for (int j = 0; j <= ERFC_POLY_ORDER; ++j) e.P[j] = PX[i][j]; e.xi = xint[i]; #else for (int j = 0; j <= ERFC_POLY_ORDER; ++j) { e.P[j][0] = PX[i[0]][j]; e.P[j][1] = PX[i[1]][j]; } e.xi[0] = xint[i[0]]; e.xi[1] = xint[i[1]]; #endif return e; } /* Accurate evaluation of exp(x^2) using compensated product (x^2 ~ x*x + e2) and custom exp(y+d) routine for small corrections d<> 63) << 62); /* Use 12-bit for small, nan and inf case detection. */ atop = (ix >> 52) & 0x7ff; cmp = v_cond_u64 (atop - v_u64 (0x3cd) >= v_u64 (0x7ff - 0x3cd)); struct entry dat; /* All entries of the vector are out of bounds, take a short path. Use smallest possible number above 28 representable in 12 bits. */ v_u64_t out_of_bounds = v_cond_u64 (atop >= v_u64 (0x404)); /* Use sign to produce either 0 if x > 0, 2 otherwise. */ if (v_all_u64 (out_of_bounds) && likely (v_any_u64 (~cmp))) return fac; /* erfc(|x|) = P(|x|-x_i)*exp(-x^2). */ v_f64_t a = v_abs_f64 (x); /* Interval bounds are a logarithmic scale, i.e. interval n has lower bound 2^(n/4) - 1. Use the exponent of (|x|+1)^4 to obtain the interval index. */ v_f64_t xp1 = a + v_f64 (1.0); xp1 = xp1 * xp1; xp1 = xp1 * xp1; v_u64_t ixp1 = v_as_u64_f64 (xp1); i = (ixp1 >> 52) - v_u64 (1023); /* Index cannot exceed number of polynomials. */ #ifdef SCALAR i = i <= (ERFC_NUM_INTERVALS) ? i : ERFC_NUM_INTERVALS; #else i = (v_u64_t){i[0] <= ERFC_NUM_INTERVALS ? i[0] : ERFC_NUM_INTERVALS, i[1] <= ERFC_NUM_INTERVALS ? i[1] : ERFC_NUM_INTERVALS}; #endif /* Get coeffs of i-th polynomial. */ dat = lookup (i); /* Evaluate Polynomial: P(|x|-x_i). */ z = a - dat.xi; #define C(i) dat.P[i] p = HORNER_12 (z, C); /* Evaluate Gaussian: exp(-x^2). */ v_f64_t e = v_eval_gauss (a); /* Copy sign. */ sign = v_as_u64_f64 (x) & ~AbsMask; p = v_as_f64_u64 (v_as_u64_f64 (p) ^ sign); /* Assemble result as 2.0 - p * e if x < 0, p * e otherwise. */ y = v_fma_f64 (p, e, fac); /* No need to fix value of y if x is out of bound, as P[ERFC_NUM_INTERVALS]=0. */ if (unlikely (v_any_u64 (cmp))) return specialcase (x, y, cmp); return y; } VPCS_ALIAS PL_SIG (V, D, 1, erfc, -6.0, 28.0) PL_TEST_ULP (V_NAME (erfc), 3.15) PL_TEST_INTERVAL (V_NAME (erfc), 0, 0xffff0000, 10000) PL_TEST_INTERVAL (V_NAME (erfc), 0x1p-1022, 0x1p-26, 40000) PL_TEST_INTERVAL (V_NAME (erfc), -0x1p-1022, -0x1p-26, 40000) PL_TEST_INTERVAL (V_NAME (erfc), 0x1p-26, 0x1p5, 40000) PL_TEST_INTERVAL (V_NAME (erfc), -0x1p-26, -0x1p3, 40000) PL_TEST_INTERVAL (V_NAME (erfc), 0, inf, 40000) #endif