diff options
author | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2019-10-14 15:21:28 +0100 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2019-11-05 18:38:51 +0000 |
commit | 69170e15cc2a10d7883e2fb40fb4437052febc13 (patch) | |
tree | 80a149cd724906606532751fab4ff870ad750930 /math/include | |
parent | 65464ec60ba73d113c8bd1632708014fa82dc9c6 (diff) | |
download | arm-optimized-routines-69170e15cc2a10d7883e2fb40fb4437052febc13.tar.gz |
Add vector exp2f
Same design as in expf. Worst-case error of __v_exp2f and __v_exp2f_1u
is 1.96 and 0.88 ulp respectively.
It is not clear if round/convert instructions are better or +- Shift.
For expf the latter, for exp2f the former seems more consistently
faster, but both options are kept in the code for now.
Diffstat (limited to 'math/include')
-rw-r--r-- | math/include/mathlib.h | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h index 5112068..254954a 100644 --- a/math/include/mathlib.h +++ b/math/include/mathlib.h @@ -28,6 +28,8 @@ float __s_sinf (float); float __s_cosf (float); float __s_expf (float); float __s_expf_1u (float); +float __s_exp2f (float); +float __s_exp2f_1u (float); float __s_logf (float); float __s_powf (float, float); double __s_sin (double); @@ -51,6 +53,8 @@ __f32x4_t __v_sinf (__f32x4_t); __f32x4_t __v_cosf (__f32x4_t); __f32x4_t __v_expf (__f32x4_t); __f32x4_t __v_expf_1u (__f32x4_t); +__f32x4_t __v_exp2f (__f32x4_t); +__f32x4_t __v_exp2f_1u (__f32x4_t); __f32x4_t __v_logf (__f32x4_t); __f32x4_t __v_powf (__f32x4_t, __f32x4_t); __f64x2_t __v_sin (__f64x2_t); @@ -66,6 +70,8 @@ __vpcs __f32x4_t __vn_sinf (__f32x4_t); __vpcs __f32x4_t __vn_cosf (__f32x4_t); __vpcs __f32x4_t __vn_expf (__f32x4_t); __vpcs __f32x4_t __vn_expf_1u (__f32x4_t); +__vpcs __f32x4_t __vn_exp2f (__f32x4_t); +__vpcs __f32x4_t __vn_exp2f_1u (__f32x4_t); __vpcs __f32x4_t __vn_logf (__f32x4_t); __vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t); __vpcs __f64x2_t __vn_sin (__f64x2_t); @@ -77,6 +83,7 @@ __vpcs __f64x2_t __vn_log (__f64x2_t); __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); |