math: add vector pow

This implementation is a wrapper around the scalar pow with appropriate call abi. As such it is not expected to be faster than scalar calls, the new double prec vector pow symbols are provided for completeness.
author: Szabolcs Nagy <szabolcs.nagy@arm.com> 2020-01-10 15:10:45 +0000
committer: Szabolcs Nagy <szabolcs.nagy@arm.com> 2020-01-14 12:47:48 +0000
commit: a807c9bba198cc89ddd6cb177442e1e297c935b3 (patch)
tree: 2d8578dfdc4d6d45257a4518f8244396e876cd7b /math/test
parent: 099350affd8bd6eebf9d1e067b102530740b7270 (diff)
download: arm-optimized-routines-a807c9bba198cc89ddd6cb177442e1e297c935b3.tar.gz
3 files changed, 50 insertions, 0 deletions
diff --git a/math/test/mathbench.c b/math/test/mathbench.c
index 8d3ff1d..33ceda3 100644
--- a/math/test/mathbench.c
+++ b/math/test/mathbench.c
@@ -128,6 +128,18 @@ xy_Z_powf (v_float x)
 {
   return _ZGVnN4vv_powf (x, x);
 }
+
+__vpcs static v_double
+xy__vn_pow (v_double x)
+{
+  return __vn_pow (x, x);
+}
+
+__vpcs static v_double
+xy_Z_pow (v_double x)
+{
+  return _ZGVnN2vv_pow (x, x);
+}
 #endif
 
 static v_float
@@ -135,6 +147,12 @@ xy__v_powf (v_float x)
 {
   return __v_powf (x, x);
 }
+
+static v_double
+xy__v_pow (v_double x)
+{
+  return __v_pow (x, x);
+}
 #endif
 
 static float
@@ -142,6 +160,12 @@ xy__s_powf (float x)
 {
   return __s_powf (x, x);
 }
+
+static double
+xy__s_pow (double x)
+{
+  return __s_pow (x, x);
+}
 #endif
 
 static double
@@ -256,6 +280,7 @@ D (__s_sin, -3.1, 3.1)
 D (__s_cos, -3.1, 3.1)
 D (__s_exp, -9.9, 9.9)
 D (__s_log, 0.01, 11.1)
+{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
 F (__s_expf, -9.9, 9.9)
 F (__s_expf_1u, -9.9, 9.9)
 F (__s_exp2f, -9.9, 9.9)
@@ -270,6 +295,7 @@ VD (__v_sin, -3.1, 3.1)
 VD (__v_cos, -3.1, 3.1)
 VD (__v_exp, -9.9, 9.9)
 VD (__v_log, 0.01, 11.1)
+{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
 VF (__v_dummyf, 1.0, 2.0)
 VF (__v_expf, -9.9, 9.9)
 VF (__v_expf_1u, -9.9, 9.9)
@@ -285,6 +311,8 @@ VND (__vn_exp, -9.9, 9.9)
 VND (_ZGVnN2v_exp, -9.9, 9.9)
 VND (__vn_log, 0.01, 11.1)
 VND (_ZGVnN2v_log, 0.01, 11.1)
+{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
+{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
 VND (__vn_sin, -3.1, 3.1)
 VND (_ZGVnN2v_sin, -3.1, 3.1)
 VND (__vn_cos, -3.1, 3.1)
diff --git a/math/test/runulp.sh b/math/test/runulp.sh
index 44393b8..ea524ca 100755
--- a/math/test/runulp.sh
+++ b/math/test/runulp.sh
@@ -110,6 +110,15 @@ range_exp='
  -633.3     -777.3     10000
 '
 
+range_pow='
+ 0x1p-1   0x1p1  x  0x1p-10 0x1p10   50000
+ 0x1p-1   0x1p1  x -0x1p-10 -0x1p10  50000
+ 0x1p-500 0x1p500  x  0x1p-1 0x1p1   50000
+ 0x1p-500 0x1p500  x  -0x1p-1 -0x1p1 50000
+ 0x1.ep-1 0x1.1p0 x  0x1p8 0x1p16    50000
+ 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16   50000
+'
+
 range_expf='
   0    0xffff0000    10000
   0x1p-14   0x1p8    500000
@@ -143,6 +152,7 @@ range_powf='
 
 # error limits
 L_exp=1.9
+L_pow=0.05
 L_expf=1.49
 L_expf_1u=0.4
 L_exp2f=1.49
@@ -173,6 +183,11 @@ exp  __v_exp       $runv
 exp  __vn_exp      $runvn
 exp  _ZGVnN2v_exp  $runvn
 
+pow __s_pow       $runs
+pow __v_pow       $runv
+pow __vn_pow      $runvn
+pow _ZGVnN2vv_pow $runvn
+
 expf __s_expf      $runs
 expf __v_expf      $runv
 expf __vn_expf     $runvn
diff --git a/math/test/ulp.c b/math/test/ulp.c
index b746080..444bbca 100644
--- a/math/test/ulp.c
+++ b/math/test/ulp.c
@@ -240,6 +240,7 @@ static double v_sin(double x) { return __v_sin(argd(x))[0]; }
 static double v_cos(double x) { return __v_cos(argd(x))[0]; }
 static double v_exp(double x) { return __v_exp(argd(x))[0]; }
 static double v_log(double x) { return __v_log(argd(x))[0]; }
+static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; }
 #ifdef __vpcs
 static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
 static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; }
@@ -253,6 +254,7 @@ static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
 static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
 static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
 static double vn_log(double x) { return __vn_log(argd(x))[0]; }
+static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; }
 static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
 static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
 static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
@@ -263,6 +265,7 @@ static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
 static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
 static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
 static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
+static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
 #endif
 #endif
 
@@ -334,6 +337,7 @@ static const struct fun fun[] = {
  F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
  F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
  F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0)
+ F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0)
 #if __aarch64__
  F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
  F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1)
@@ -347,6 +351,7 @@ static const struct fun fun[] = {
  F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
  F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
  F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1)
 #ifdef __vpcs
  F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
  F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1)
@@ -360,6 +365,7 @@ static const struct fun fun[] = {
  F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
  F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
  F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1)
  F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
  F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
  F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
@@ -370,6 +376,7 @@ static const struct fun fun[] = {
  F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
  F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
  F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
 #endif
 #endif
 #endif
author	Szabolcs Nagy <szabolcs.nagy@arm.com>	2020-01-10 15:10:45 +0000
committer	Szabolcs Nagy <szabolcs.nagy@arm.com>	2020-01-14 12:47:48 +0000
commit	a807c9bba198cc89ddd6cb177442e1e297c935b3 (patch)
tree	2d8578dfdc4d6d45257a4518f8244396e876cd7b /math/test
parent	099350affd8bd6eebf9d1e067b102530740b7270 (diff)
download	arm-optimized-routines-a807c9bba198cc89ddd6cb177442e1e297c935b3.tar.gz