aboutsummaryrefslogtreecommitdiff
path: root/pl/math/v_tanhf_2u6.c
diff options
context:
space:
mode:
authorJoe Ramsay <Joe.Ramsay@arm.com>2022-12-22 16:20:22 +0000
committerJoe Ramsay <joe.ramsay@arm.com>2022-12-22 16:20:22 +0000
commit0a9270a27f48bea87c5bd3f0f9c759da66fb45a3 (patch)
tree7a9eb5555187c92f1e1a49bd155241a1b41f4764 /pl/math/v_tanhf_2u6.c
parent3bfa7bd49c5576d5b1f9e6a79e3d3a15fe3823bc (diff)
downloadarm-optimized-routines-0a9270a27f48bea87c5bd3f0f9c759da66fb45a3.tar.gz
pl/math: Fix fp exceptions in Neon sinhf and sinh
Both routines previously relied on the vector expm1(f) routine exposed by the library, which depended on WANT_SIMD_EXCEPT for its fenv behaviour, however both routines were expected to always trigger fp exceptions correctly. To remedy this, both routines now use an inlined helper for expm1 (reused from vector tanhf in the case of sinhf), and special-case small input as well as large when WANT_SIMD_EXCEPT is enabled.
Diffstat (limited to 'pl/math/v_tanhf_2u6.c')
-rw-r--r--pl/math/v_tanhf_2u6.c38
1 files changed, 2 insertions, 36 deletions
diff --git a/pl/math/v_tanhf_2u6.c b/pl/math/v_tanhf_2u6.c
index dedc085..0e7ff69 100644
--- a/pl/math/v_tanhf_2u6.c
+++ b/pl/math/v_tanhf_2u6.c
@@ -5,51 +5,17 @@
*/
#include "v_math.h"
-#include "estrinf.h"
-#include "mathlib.h"
#include "pl_sig.h"
#include "pl_test.h"
#if V_SUPPORTED
+#include "v_expm1f_inline.h"
+
#define BoringBound \
0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for \
negative). */
#define AbsMask 0x7fffffff
-#define One 0x3f800000
-
-#define Shift v_f32 (0x1.8p23f)
-#define InvLn2 v_f32 (0x1.715476p+0f)
-#define MLn2hi v_f32 (-0x1.62e4p-1f)
-#define MLn2lo v_f32 (-0x1.7f7d1cp-20f)
-
-#define C(i) v_f32 (__expm1f_poly[i])
-
-static inline v_f32_t
-expm1f_inline (v_f32_t x)
-{
- /* Helper routine for calculating exp(x) - 1.
- Copied from v_expm1f_1u6.c, with all special-case handling removed, as
- special, tiny and large values are all dealt with in the main tanhf
- routine. */
-
- /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
- v_f32_t j = v_fma_f32 (InvLn2, x, Shift) - Shift;
- v_s32_t i = v_to_s32_f32 (j);
- v_f32_t f = v_fma_f32 (j, MLn2hi, x);
- f = v_fma_f32 (j, MLn2lo, f);
-
- /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
- Uses Estrin scheme, where the main __v_expm1f routine uses Horner. */
- v_f32_t f2 = f * f;
- v_f32_t p = ESTRIN_4 (f, f2, f2 * f2, C);
- p = v_fma_f32 (f2, p, f);
-
- /* t = 2^i. */
- v_f32_t t = v_as_f32_u32 (v_as_u32_s32 (i << 23) + One);
- /* expm1(x) ~= p * t + (t - 1). */
- return v_fma_f32 (p, t, t - 1);
-}
static NOINLINE v_f32_t
special_case (v_f32_t x, v_f32_t y, v_u32_t special)