aboutsummaryrefslogtreecommitdiff
path: root/math/v_expf_1u.c
diff options
context:
space:
mode:
authorSzabolcs Nagy <szabolcs.nagy@arm.com>2019-10-14 15:21:28 +0100
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2019-11-05 18:38:51 +0000
commit69170e15cc2a10d7883e2fb40fb4437052febc13 (patch)
tree80a149cd724906606532751fab4ff870ad750930 /math/v_expf_1u.c
parent65464ec60ba73d113c8bd1632708014fa82dc9c6 (diff)
downloadarm-optimized-routines-69170e15cc2a10d7883e2fb40fb4437052febc13.tar.gz
Add vector exp2f
Same design as in expf. Worst-case error of __v_exp2f and __v_exp2f_1u is 1.96 and 0.88 ulp respectively. It is not clear if round/convert instructions are better or +- Shift. For expf the latter, for exp2f the former seems more consistently faster, but both options are kept in the code for now.
Diffstat (limited to 'math/v_expf_1u.c')
-rw-r--r--math/v_expf_1u.c8
1 files changed, 8 insertions, 0 deletions
diff --git a/math/v_expf_1u.c b/math/v_expf_1u.c
index 37d3d1e..023bd24 100644
--- a/math/v_expf_1u.c
+++ b/math/v_expf_1u.c
@@ -51,11 +51,19 @@ V_NAME(expf_1u) (v_f32_t x)
/* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)]
x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+#if 1
z = v_fma_f32 (x, InvLn2, Shift);
n = z - Shift;
r = v_fma_f32 (n, -Ln2hi, x);
r = v_fma_f32 (n, -Ln2lo, r);
e = v_as_u32_f32 (z) << 23;
+#else
+ z = x * InvLn2;
+ n = v_round_f32 (z);
+ r = v_fma_f32 (n, -Ln2hi, x);
+ r = v_fma_f32 (n, -Ln2lo, r);
+ e = v_as_u32_s32 (v_round_s32 (z)) << 23;
+#endif
scale = v_as_f32_u32 (e + v_u32 (0x3f800000));
absn = v_abs_f32 (n);
cmp = v_cond_u32 (absn > v_f32 (126.0f));