aboutsummaryrefslogtreecommitdiff
path: root/math/include
diff options
context:
space:
mode:
authorSzabolcs Nagy <szabolcs.nagy@arm.com>2019-10-14 15:21:28 +0100
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2019-11-05 18:38:51 +0000
commit69170e15cc2a10d7883e2fb40fb4437052febc13 (patch)
tree80a149cd724906606532751fab4ff870ad750930 /math/include
parent65464ec60ba73d113c8bd1632708014fa82dc9c6 (diff)
downloadarm-optimized-routines-69170e15cc2a10d7883e2fb40fb4437052febc13.tar.gz
Add vector exp2f
Same design as in expf. Worst-case error of __v_exp2f and __v_exp2f_1u is 1.96 and 0.88 ulp respectively. It is not clear if round/convert instructions are better or +- Shift. For expf the latter, for exp2f the former seems more consistently faster, but both options are kept in the code for now.
Diffstat (limited to 'math/include')
-rw-r--r--math/include/mathlib.h7
1 files changed, 7 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h
index 5112068..254954a 100644
--- a/math/include/mathlib.h
+++ b/math/include/mathlib.h
@@ -28,6 +28,8 @@ float __s_sinf (float);
float __s_cosf (float);
float __s_expf (float);
float __s_expf_1u (float);
+float __s_exp2f (float);
+float __s_exp2f_1u (float);
float __s_logf (float);
float __s_powf (float, float);
double __s_sin (double);
@@ -51,6 +53,8 @@ __f32x4_t __v_sinf (__f32x4_t);
__f32x4_t __v_cosf (__f32x4_t);
__f32x4_t __v_expf (__f32x4_t);
__f32x4_t __v_expf_1u (__f32x4_t);
+__f32x4_t __v_exp2f (__f32x4_t);
+__f32x4_t __v_exp2f_1u (__f32x4_t);
__f32x4_t __v_logf (__f32x4_t);
__f32x4_t __v_powf (__f32x4_t, __f32x4_t);
__f64x2_t __v_sin (__f64x2_t);
@@ -66,6 +70,8 @@ __vpcs __f32x4_t __vn_sinf (__f32x4_t);
__vpcs __f32x4_t __vn_cosf (__f32x4_t);
__vpcs __f32x4_t __vn_expf (__f32x4_t);
__vpcs __f32x4_t __vn_expf_1u (__f32x4_t);
+__vpcs __f32x4_t __vn_exp2f (__f32x4_t);
+__vpcs __f32x4_t __vn_exp2f_1u (__f32x4_t);
__vpcs __f32x4_t __vn_logf (__f32x4_t);
__vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t);
__vpcs __f64x2_t __vn_sin (__f64x2_t);
@@ -77,6 +83,7 @@ __vpcs __f64x2_t __vn_log (__f64x2_t);
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);