diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 00:57:30 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 00:57:30 +0000 |
commit | 0edd6499aaed16bf45de92bb0ad1c729486ce6f4 (patch) | |
tree | b6182e391304fb3a42c51d482dcf671f540f2363 /pl/math/v_erf_2u.c | |
parent | f2e7d2de0fe4c2bddb59992ba401391f38627a1e (diff) | |
parent | 172d24a7ae67ee7bae413d5a8618f1b5edc002be (diff) | |
download | arm-optimized-routines-0edd6499aaed16bf45de92bb0ad1c729486ce6f4.tar.gz |
Snap for 10447354 from 172d24a7ae67ee7bae413d5a8618f1b5edc002be to mainline-cellbroadcast-releaseaml_cbr_341710000aml_cbr_341610000aml_cbr_341510010aml_cbr_341410010aml_cbr_341311010aml_cbr_341110000aml_cbr_341011000aml_cbr_340914000android14-mainline-cellbroadcast-release
Change-Id: I8753ae14d61308952964b5f87c7e48044f60727c
Diffstat (limited to 'pl/math/v_erf_2u.c')
-rw-r--r-- | pl/math/v_erf_2u.c | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/pl/math/v_erf_2u.c b/pl/math/v_erf_2u.c new file mode 100644 index 0000000..1d7ddbb --- /dev/null +++ b/pl/math/v_erf_2u.c @@ -0,0 +1,116 @@ +/* + * Double-precision vector erf(x) function. + * + * Copyright (c) 2019-2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "v_math.h" +#include "include/mathlib.h" +#include "math_config.h" +#include "pl_sig.h" +#include "pl_test.h" + +#if V_SUPPORTED + +#define AbsMask v_u64 (0x7fffffffffffffff) +#define AbsXMax v_f64 (0x1.8p+2) +#define Scale v_f64 (0x1p+3) + +/* Special cases (fall back to scalar calls). */ +VPCS_ATTR +NOINLINE static v_f64_t +specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp) +{ + return v_call_f64 (erf, x, y, cmp); +} + +/* A structure to perform look-up in coeffs and other parameter tables. */ +struct entry +{ + v_f64_t P[V_ERF_NCOEFFS]; + v_f64_t shift; +}; + +static inline struct entry +lookup (v_u64_t i) +{ + struct entry e; +#ifdef SCALAR + for (int j = 0; j < V_ERF_NCOEFFS; ++j) + e.P[j] = __v_erf_data.coeffs[j][i]; + e.shift = __v_erf_data.shifts[i]; +#else + for (int j = 0; j < V_ERF_NCOEFFS; ++j) + { + e.P[j][0] = __v_erf_data.coeffs[j][i[0]]; + e.P[j][1] = __v_erf_data.coeffs[j][i[1]]; + } + e.shift[0] = __v_erf_data.shifts[i[0]]; + e.shift[1] = __v_erf_data.shifts[i[1]]; +#endif + return e; +} + +/* Optimized double precision vector error function erf. Maximum + observed error is 1.75 ULP, in [0.110, 0.111]: + verf(0x1.c5e0c2d5d0543p-4) got 0x1.fe0ed62a54987p-4 + want 0x1.fe0ed62a54985p-4. */ +VPCS_ATTR +v_f64_t V_NAME (erf) (v_f64_t x) +{ + /* Handle both inf/nan as well as small values (|x|<2^-28) + If any condition in the lane is true then a loop over + scalar calls will be performed. */ + v_u64_t ix = v_as_u64_f64 (x); + v_u64_t atop = (ix >> 48) & v_u64 (0x7fff); + v_u64_t special_case + = v_cond_u64 (atop - v_u64 (0x3e30) >= v_u64 (0x7ff0 - 0x3e30)); + + /* Get sign and absolute value. */ + v_u64_t sign = v_as_u64_f64 (x) & ~AbsMask; + v_f64_t a = v_min_f64 (v_abs_f64 (x), AbsXMax); + + /* Compute index by truncating 8 * a with a=|x| saturated to 6.0. */ + +#ifdef SCALAR + v_u64_t i = v_trunc_u64 (a * Scale); +#else + v_u64_t i = vcvtq_n_u64_f64 (a, 3); +#endif + /* Get polynomial coefficients and shift parameter using lookup. */ + struct entry dat = lookup (i); + + /* Evaluate polynomial on transformed argument. */ + v_f64_t z = v_fma_f64 (a, Scale, dat.shift); + + v_f64_t r1 = v_fma_f64 (z, dat.P[1], dat.P[0]); + v_f64_t r2 = v_fma_f64 (z, dat.P[3], dat.P[2]); + v_f64_t r3 = v_fma_f64 (z, dat.P[5], dat.P[4]); + v_f64_t r4 = v_fma_f64 (z, dat.P[7], dat.P[6]); + v_f64_t r5 = v_fma_f64 (z, dat.P[9], dat.P[8]); + + v_f64_t z2 = z * z; + v_f64_t y = v_fma_f64 (z2, r5, r4); + y = v_fma_f64 (z2, y, r3); + y = v_fma_f64 (z2, y, r2); + y = v_fma_f64 (z2, y, r1); + + /* y=erf(x) if x>0, -erf(-x) otherwise. */ + y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign); + + if (unlikely (v_any_u64 (special_case))) + return specialcase (x, y, special_case); + return y; +} +VPCS_ALIAS + +PL_SIG (V, D, 1, erf, -6.0, 6.0) +PL_TEST_ULP (V_NAME (erf), 1.26) +PL_TEST_INTERVAL (V_NAME (erf), 0, 0xffff0000, 10000) +PL_TEST_INTERVAL (V_NAME (erf), 0x1p-127, 0x1p-26, 40000) +PL_TEST_INTERVAL (V_NAME (erf), -0x1p-127, -0x1p-26, 40000) +PL_TEST_INTERVAL (V_NAME (erf), 0x1p-26, 0x1p3, 40000) +PL_TEST_INTERVAL (V_NAME (erf), -0x1p-26, -0x1p3, 40000) +PL_TEST_INTERVAL (V_NAME (erf), 0, inf, 40000) +#endif |