diff options
Diffstat (limited to 'math/v_cosf.c')
-rw-r--r-- | math/v_cosf.c | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/math/v_cosf.c b/math/v_cosf.c new file mode 100644 index 0000000..150294b --- /dev/null +++ b/math/v_cosf.c @@ -0,0 +1,76 @@ +/* + * Single-precision vector cos function. + * + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "mathlib.h" +#include "v_math.h" +#if V_SUPPORTED + +static const float Poly[] = { + /* 1.886 ulp error */ + 0x1.5b2e76p-19f, + -0x1.9f42eap-13f, + 0x1.110df4p-7f, + -0x1.555548p-3f, +}; +#define Pi1 v_f32 (0x1.921fb6p+1f) +#define Pi2 v_f32 (-0x1.777a5cp-24f) +#define Pi3 v_f32 (-0x1.ee59dap-49f) +#define A3 v_f32 (Poly[3]) +#define A5 v_f32 (Poly[2]) +#define A7 v_f32 (Poly[1]) +#define A9 v_f32 (Poly[0]) +#define RangeVal v_f32 (0x1p20f) +#define InvPi v_f32 (0x1.45f306p-2f) +#define Shift v_f32 (0x1.8p+23f) +#define AbsMask v_u32 (0x7fffffff) +#define HalfPi v_f32 (0x1.921fb6p0f) + +VPCS_ATTR +static v_f32_t +specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp) +{ + /* Fall back to scalar code. */ + return v_call_f32 (cosf, x, y, cmp); +} + +VPCS_ATTR +v_f32_t +V_NAME(cosf) (v_f32_t x) +{ + v_f32_t n, r, r2, y; + v_u32_t odd, cmp; + + r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask); + cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal)); + + /* n = rint((|x|+pi/2)/pi) - 0.5 */ + n = v_fma_f32 (InvPi, r + HalfPi, Shift); + odd = v_as_u32_f32 (n) << 31; + n -= Shift; + n -= v_f32 (0.5f); + + /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */ + r = v_fma_f32 (-Pi1, n, r); + r = v_fma_f32 (-Pi2, n, r); + r = v_fma_f32 (-Pi3, n, r); + + /* y = sin(r) */ + r2 = r * r; + y = v_fma_f32 (A9, r2, A7); + y = v_fma_f32 (y, r2, A5); + y = v_fma_f32 (y, r2, A3); + y = v_fma_f32 (y * r2, r, r); + + /* sign fix */ + y = v_as_f32_u32 (v_as_u32_f32 (y) ^ odd); + + if (unlikely (v_any_u32 (cmp))) + return specialcase (x, y, cmp); + return y; +} +VPCS_ALIAS +#endif |