diff options
author | Joe Ramsay <Joe.Ramsay@arm.com> | 2023-11-28 13:56:00 +0000 |
---|---|---|
committer | Joe Ramsay <joe.ramsay@arm.com> | 2023-11-28 13:56:00 +0000 |
commit | da1d55372f09cfa0f927aa96a35df428557648fa (patch) | |
tree | 3385ce846dbf7d470939f4fb4ec950cc28d8b9aa | |
parent | 0f1ced13df1a24da0d16a2da5638c1ef25832a36 (diff) | |
download | arm-optimized-routines-da1d55372f09cfa0f927aa96a35df428557648fa.tar.gz |
pl/math: Small improvements to Neon exp10f
Use abs instead of mask, zerofy instead of VSEL with one, fix
indentation.
-rw-r--r-- | pl/math/v_exp10f_2u4.c | 12 |
1 files changed, 5 insertions, 7 deletions
diff --git a/pl/math/v_exp10f_2u4.c b/pl/math/v_exp10f_2u4.c index f5e6399..0e91bec 100644 --- a/pl/math/v_exp10f_2u4.c +++ b/pl/math/v_exp10f_2u4.c @@ -57,9 +57,9 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp) #else -# define SpecialBound 126.0f /* rint (log2 (2^127 / (1 + sqrt (2)))). */ -# define SpecialOffset v_u32 (0x82000000) -# define SpecialBias v_u32 (0x7f000000) +# define SpecialBound 126.0f /* rint (log2 (2^127 / (1 + sqrt (2)))). */ +# define SpecialOffset v_u32 (0x82000000) +# define SpecialBias v_u32 (0x7f000000) static float32x4_t VPCS_ATTR NOINLINE special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1, @@ -90,15 +90,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (exp10) (float32x4_t x) #if WANT_SIMD_EXCEPT /* asuint(x) - TinyBound >= BigBound - TinyBound. */ uint32x4_t cmp = vcgeq_u32 ( - vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)), - TinyBound), - Thres); + vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres); float32x4_t xm = x; /* If any lanes are special, mask them with 1 and retain a copy of x to allow special case handler to fix special lanes later. This is only necessary if fenv exceptions are to be triggered correctly. */ if (unlikely (v_any_u32 (cmp))) - x = vbslq_f32 (cmp, v_f32 (1), x); + x = v_zerofy_f32 (x, cmp); #endif /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)), |