Add vector powf

Essentially the scalar powf algorithm is used for each element in the vector just inlined for better scheduling and simpler special case handling. The log polynomial is smaller as less accuracy is enough. Worst-case error is 2.6 ulp.
author: Szabolcs Nagy <szabolcs.nagy@arm.com> 2019-08-09 16:24:59 +0100
committer: Szabolcs Nagy <szabolcs.nagy@arm.com> 2019-10-14 11:58:53 +0100
commit: ba75d0a0d8235119eef1305f0d9a2142fa4b11e0 (patch)
tree: 2e5fe02ff810134efff2ad534b0a8c2e61e075c0 /math/include/mathlib.h
parent: c5cba8528da13fe0d647dbd0f80d0cf21434b224 (diff)
download: arm-optimized-routines-ba75d0a0d8235119eef1305f0d9a2142fa4b11e0.tar.gz
1 files changed, 4 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h
index 405cf4a..1788502 100644
--- a/math/include/mathlib.h
+++ b/math/include/mathlib.h
@@ -29,6 +29,7 @@ float __s_cosf (float);
 float __s_expf (float);
 float __s_expf_1u (float);
 float __s_logf (float);
+float __s_powf (float, float);
 double __s_exp (double);
 
 #if __aarch64__
@@ -48,6 +49,7 @@ __f32x4_t __v_cosf (__f32x4_t);
 __f32x4_t __v_expf (__f32x4_t);
 __f32x4_t __v_expf_1u (__f32x4_t);
 __f32x4_t __v_logf (__f32x4_t);
+__f32x4_t __v_powf (__f32x4_t, __f32x4_t);
 __f64x2_t __v_exp (__f64x2_t);
 
 #if __GNUC__ >= 9 || __clang_major__ >= 8
@@ -59,6 +61,7 @@ __vpcs __f32x4_t __vn_cosf (__f32x4_t);
 __vpcs __f32x4_t __vn_expf (__f32x4_t);
 __vpcs __f32x4_t __vn_expf_1u (__f32x4_t);
 __vpcs __f32x4_t __vn_logf (__f32x4_t);
+__vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t);
 __vpcs __f64x2_t __vn_exp (__f64x2_t);
 
 /* Vector functions following the vector PCS using ABI names.  */
@@ -66,6 +69,7 @@ __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
 __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
 __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
 #endif
 #endif
author	Szabolcs Nagy <szabolcs.nagy@arm.com>	2019-08-09 16:24:59 +0100
committer	Szabolcs Nagy <szabolcs.nagy@arm.com>	2019-10-14 11:58:53 +0100
commit	ba75d0a0d8235119eef1305f0d9a2142fa4b11e0 (patch)
tree	2e5fe02ff810134efff2ad534b0a8c2e61e075c0 /math/include/mathlib.h
parent	c5cba8528da13fe0d647dbd0f80d0cf21434b224 (diff)
download	arm-optimized-routines-ba75d0a0d8235119eef1305f0d9a2142fa4b11e0.tar.gz