aboutsummaryrefslogtreecommitdiff
path: root/math/v_sinf.c
diff options
context:
space:
mode:
authorJoe Ramsay <Joe.Ramsay@arm.com>2022-12-07 15:31:03 +0000
committerJoe Ramsay <joe.ramsay@arm.com>2022-12-07 15:31:03 +0000
commita5e45e4e299f5fe6b51601694cc3cb066a20723a (patch)
tree06f65610043a940b20a2aa3aaeda1d0686b075e7 /math/v_sinf.c
parent7d205b8787a4462d6e605ee826edf2666f899a34 (diff)
downloadarm-optimized-routines-a5e45e4e299f5fe6b51601694cc3cb066a20723a.tar.gz
math: Set fenv exceptions for several Neon routines
In most cases, we mask lanes which should not trigger exceptions with a neutral value, then let the existing special-case handler fix them up later. For exp and exp2 we replace the more complex special-case handler with a simple scalar fallback. All new behaviour is tested in runulp.sh, with a new option to pass -f to the run line. We also extend the fenv testing to Neon log and logf, which already triggered exceptions correctly. New behaviour is mostly hidden behind a new config setting, WANT_SIMD_EXCEPT.
Diffstat (limited to 'math/v_sinf.c')
-rw-r--r--math/v_sinf.c19
1 files changed, 16 insertions, 3 deletions
diff --git a/math/v_sinf.c b/math/v_sinf.c
index d2e18b5..ee6ed9a 100644
--- a/math/v_sinf.c
+++ b/math/v_sinf.c
@@ -24,6 +24,7 @@ static const float Poly[] = {
#define A7 v_f32 (Poly[1])
#define A9 v_f32 (Poly[0])
#define RangeVal v_f32 (0x1p20f)
+#define TinyBound v_f32 (0x1p-61f)
#define InvPi v_f32 (0x1.45f306p-2f)
#define Shift v_f32 (0x1.8p+23f)
#define AbsMask v_u32 (0x7fffffff)
@@ -41,11 +42,23 @@ v_f32_t
V_NAME(sinf) (v_f32_t x)
{
v_f32_t n, r, r2, y;
- v_u32_t sign, odd, cmp;
+ v_u32_t sign, odd, cmp, ir;
- r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
+ ir = v_as_u32_f32 (x) & AbsMask;
+ r = v_as_f32_u32 (ir);
sign = v_as_u32_f32 (x) & ~AbsMask;
- cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
+
+#if WANT_SIMD_EXCEPT
+ cmp = v_cond_u32 ((ir - v_as_u32_f32 (TinyBound)
+ >= v_as_u32_f32 (RangeVal) - v_as_u32_f32 (TinyBound)));
+ if (unlikely (v_any_u32 (cmp)))
+ /* If fenv exceptions are to be triggered correctly, set any special lanes
+ to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+ specialcase later. */
+ r = v_sel_f32 (cmp, v_f32 (1), r);
+#else
+ cmp = v_cond_u32 (ir >= v_as_u32_f32 (RangeVal));
+#endif
/* n = rint(|x|/pi) */
n = v_fma_f32 (InvPi, r, Shift);