diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2022-12-07 15:31:03 +0000 |
---|---|---|
committer | Joe Ramsay <joe.ramsay@arm.com> | 2022-12-07 15:31:03 +0000 |
commit | a5e45e4e299f5fe6b51601694cc3cb066a20723a (patch) | |
tree | 06f65610043a940b20a2aa3aaeda1d0686b075e7 /math/v_sinf.c | |
parent | 7d205b8787a4462d6e605ee826edf2666f899a34 (diff) | |
download | arm-optimized-routines-a5e45e4e299f5fe6b51601694cc3cb066a20723a.tar.gz |
math: Set fenv exceptions for several Neon routines
In most cases, we mask lanes which should not trigger exceptions with
a neutral value, then let the existing special-case handler fix them
up later. For exp and exp2 we replace the more complex special-case
handler with a simple scalar fallback. All new behaviour is tested in
runulp.sh, with a new option to pass -f to the run line. We also
extend the fenv testing to Neon log and logf, which already triggered
exceptions correctly. New behaviour is mostly hidden behind a new
config setting, WANT_SIMD_EXCEPT.
Diffstat (limited to 'math/v_sinf.c')
-rw-r--r-- | math/v_sinf.c | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/math/v_sinf.c b/math/v_sinf.c index d2e18b5..ee6ed9a 100644 --- a/math/v_sinf.c +++ b/math/v_sinf.c @@ -24,6 +24,7 @@ static const float Poly[] = { #define A7 v_f32 (Poly[1]) #define A9 v_f32 (Poly[0]) #define RangeVal v_f32 (0x1p20f) +#define TinyBound v_f32 (0x1p-61f) #define InvPi v_f32 (0x1.45f306p-2f) #define Shift v_f32 (0x1.8p+23f) #define AbsMask v_u32 (0x7fffffff) @@ -41,11 +42,23 @@ v_f32_t V_NAME(sinf) (v_f32_t x) { v_f32_t n, r, r2, y; - v_u32_t sign, odd, cmp; + v_u32_t sign, odd, cmp, ir; - r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask); + ir = v_as_u32_f32 (x) & AbsMask; + r = v_as_f32_u32 (ir); sign = v_as_u32_f32 (x) & ~AbsMask; - cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal)); + +#if WANT_SIMD_EXCEPT + cmp = v_cond_u32 ((ir - v_as_u32_f32 (TinyBound) + >= v_as_u32_f32 (RangeVal) - v_as_u32_f32 (TinyBound))); + if (unlikely (v_any_u32 (cmp))) + /* If fenv exceptions are to be triggered correctly, set any special lanes + to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by + specialcase later. */ + r = v_sel_f32 (cmp, v_f32 (1), r); +#else + cmp = v_cond_u32 (ir >= v_as_u32_f32 (RangeVal)); +#endif /* n = rint(|x|/pi) */ n = v_fma_f32 (InvPi, r, Shift); |