8 files changed, 129 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h
index 254954a..4493008 100644
--- a/math/include/mathlib.h
+++ b/math/include/mathlib.h
@@ -36,6 +36,7 @@ double __s_sin (double);
 double __s_cos (double);
 double __s_exp (double);
 double __s_log (double);
+double __s_pow (double, double);
 
 #if __aarch64__
 #if __GNUC__ >= 5
@@ -61,6 +62,7 @@ __f64x2_t __v_sin (__f64x2_t);
 __f64x2_t __v_cos (__f64x2_t);
 __f64x2_t __v_exp (__f64x2_t);
 __f64x2_t __v_log (__f64x2_t);
+__f64x2_t __v_pow (__f64x2_t, __f64x2_t);
 
 #if __GNUC__ >= 9 || __clang_major__ >= 8
 #define __vpcs __attribute__((__aarch64_vector_pcs__))
@@ -78,6 +80,7 @@ __vpcs __f64x2_t __vn_sin (__f64x2_t);
 __vpcs __f64x2_t __vn_cos (__f64x2_t);
 __vpcs __f64x2_t __vn_exp (__f64x2_t);
 __vpcs __f64x2_t __vn_log (__f64x2_t);
+__vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t);
 
 /* Vector functions following the vector PCS using ABI names.  */
 __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
@@ -90,6 +93,7 @@ __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
 __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
 #endif
 #endif
 
diff --git a/math/s_pow.c b/math/s_pow.c
new file mode 100644
index 0000000..2e34c9f
--- /dev/null
+++ b/math/s_pow.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_pow.c"
diff --git a/math/test/mathbench.c b/math/test/mathbench.c
index 8d3ff1d..33ceda3 100644
--- a/math/test/mathbench.c
+++ b/math/test/mathbench.c
@@ -128,6 +128,18 @@ xy_Z_powf (v_float x)
 {
   return _ZGVnN4vv_powf (x, x);
 }
+
+__vpcs static v_double
+xy__vn_pow (v_double x)
+{
+  return __vn_pow (x, x);
+}
+
+__vpcs static v_double
+xy_Z_pow (v_double x)
+{
+  return _ZGVnN2vv_pow (x, x);
+}
 #endif
 
 static v_float
@@ -135,6 +147,12 @@ xy__v_powf (v_float x)
 {
   return __v_powf (x, x);
 }
+
+static v_double
+xy__v_pow (v_double x)
+{
+  return __v_pow (x, x);
+}
 #endif
 
 static float
@@ -142,6 +160,12 @@ xy__s_powf (float x)
 {
   return __s_powf (x, x);
 }
+
+static double
+xy__s_pow (double x)
+{
+  return __s_pow (x, x);
+}
 #endif
 
 static double
@@ -256,6 +280,7 @@ D (__s_sin, -3.1, 3.1)
 D (__s_cos, -3.1, 3.1)
 D (__s_exp, -9.9, 9.9)
 D (__s_log, 0.01, 11.1)
+{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
 F (__s_expf, -9.9, 9.9)
 F (__s_expf_1u, -9.9, 9.9)
 F (__s_exp2f, -9.9, 9.9)
@@ -270,6 +295,7 @@ VD (__v_sin, -3.1, 3.1)
 VD (__v_cos, -3.1, 3.1)
 VD (__v_exp, -9.9, 9.9)
 VD (__v_log, 0.01, 11.1)
+{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
 VF (__v_dummyf, 1.0, 2.0)
 VF (__v_expf, -9.9, 9.9)
 VF (__v_expf_1u, -9.9, 9.9)
@@ -285,6 +311,8 @@ VND (__vn_exp, -9.9, 9.9)
 VND (_ZGVnN2v_exp, -9.9, 9.9)
 VND (__vn_log, 0.01, 11.1)
 VND (_ZGVnN2v_log, 0.01, 11.1)
+{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
+{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
 VND (__vn_sin, -3.1, 3.1)
 VND (_ZGVnN2v_sin, -3.1, 3.1)
 VND (__vn_cos, -3.1, 3.1)
diff --git a/math/test/runulp.sh b/math/test/runulp.sh
index 44393b8..ea524ca 100755
--- a/math/test/runulp.sh
+++ b/math/test/runulp.sh
@@ -110,6 +110,15 @@ range_exp='
  -633.3     -777.3     10000
 '
 
+range_pow='
+ 0x1p-1   0x1p1  x  0x1p-10 0x1p10   50000
+ 0x1p-1   0x1p1  x -0x1p-10 -0x1p10  50000
+ 0x1p-500 0x1p500  x  0x1p-1 0x1p1   50000
+ 0x1p-500 0x1p500  x  -0x1p-1 -0x1p1 50000
+ 0x1.ep-1 0x1.1p0 x  0x1p8 0x1p16    50000
+ 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16   50000
+'
+
 range_expf='
   0    0xffff0000    10000
   0x1p-14   0x1p8    500000
@@ -143,6 +152,7 @@ range_powf='
 
 # error limits
 L_exp=1.9
+L_pow=0.05
 L_expf=1.49
 L_expf_1u=0.4
 L_exp2f=1.49
@@ -173,6 +183,11 @@ exp  __v_exp       $runv
 exp  __vn_exp      $runvn
 exp  _ZGVnN2v_exp  $runvn
 
+pow __s_pow       $runs
+pow __v_pow       $runv
+pow __vn_pow      $runvn
+pow _ZGVnN2vv_pow $runvn
+
 expf __s_expf      $runs
 expf __v_expf      $runv
 expf __vn_expf     $runvn
diff --git a/math/test/ulp.c b/math/test/ulp.c
index b746080..444bbca 100644
--- a/math/test/ulp.c
+++ b/math/test/ulp.c
@@ -240,6 +240,7 @@ static double v_sin(double x) { return __v_sin(argd(x))[0]; }
 static double v_cos(double x) { return __v_cos(argd(x))[0]; }
 static double v_exp(double x) { return __v_exp(argd(x))[0]; }
 static double v_log(double x) { return __v_log(argd(x))[0]; }
+static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; }
 #ifdef __vpcs
 static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
 static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; }
@@ -253,6 +254,7 @@ static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
 static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
 static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
 static double vn_log(double x) { return __vn_log(argd(x))[0]; }
+static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; }
 static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
 static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
 static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
@@ -263,6 +265,7 @@ static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
 static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
 static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
 static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
+static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
 #endif
 #endif
 
@@ -334,6 +337,7 @@ static const struct fun fun[] = {
  F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
  F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
  F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0)
+ F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0)
 #if __aarch64__
  F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
  F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1)
@@ -347,6 +351,7 @@ static const struct fun fun[] = {
  F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
  F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
  F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1)
 #ifdef __vpcs
  F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
  F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1)
@@ -360,6 +365,7 @@ static const struct fun fun[] = {
  F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
  F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
  F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1)
  F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
  F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
  F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
@@ -370,6 +376,7 @@ static const struct fun fun[] = {
  F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
  F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
  F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
 #endif
 #endif
 #endif
diff --git a/math/v_math.h b/math/v_math.h
index 0861e98..3db22e5 100644
--- a/math/v_math.h
+++ b/math/v_math.h
@@ -249,6 +249,11 @@ v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
   return f (x1, x2);
 }
 
+static inline int
+v_lanes64 (void)
+{
+  return 1;
+}
 static inline v_f64_t
 v_f64 (f64_t x)
 {
@@ -264,6 +269,16 @@ v_s64 (s64_t x)
 {
   return x;
 }
+static inline f64_t
+v_get_f64 (v_f64_t x, int i)
+{
+  return x;
+}
+static inline void
+v_set_f64 (v_f64_t *x, int i, f64_t v)
+{
+  *x = v;
+}
 /* true if any elements of a v_cond result is non-zero.  */
 static inline int
 v_any_u64 (v_u64_t x)
@@ -506,6 +521,11 @@ v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
 	     p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
 }
 
+static inline int
+v_lanes64 (void)
+{
+  return 2;
+}
 static inline v_f64_t
 v_f64 (f64_t x)
 {
@@ -521,6 +541,16 @@ v_s64 (s64_t x)
 {
   return (v_s64_t){x, x};
 }
+static inline f64_t
+v_get_f64 (v_f64_t x, int i)
+{
+  return x[i];
+}
+static inline void
+v_set_f64 (v_f64_t *x, int i, f64_t v)
+{
+  (*x)[i] = v;
+}
 /* true if any elements of a v_cond result is non-zero.  */
 static inline int
 v_any_u64 (v_u64_t x)
diff --git a/math/v_pow.c b/math/v_pow.c
new file mode 100644
index 0000000..a209d57
--- /dev/null
+++ b/math/v_pow.c
@@ -0,0 +1,27 @@
+/*
+ * Double-precision vector pow function.
+ *
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#if V_SUPPORTED
+
+VPCS_ATTR
+v_f64_t
+V_NAME(pow) (v_f64_t x, v_f64_t y)
+{
+  v_f64_t z;
+  for (int lane = 0; lane < v_lanes64 (); lane++)
+    {
+      f64_t sx = v_get_f64 (x, lane);
+      f64_t sy = v_get_f64 (y, lane);
+      f64_t sz = pow (sx, sy);
+      v_set_f64 (&z, lane, sz);
+    }
+  return z;
+}
+VPCS_ALIAS
+#endif
diff --git a/math/vn_pow.c b/math/vn_pow.c
new file mode 100644
index 0000000..2609501
--- /dev/null
+++ b/math/vn_pow.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_pow.
+ *
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#include "mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS strong_alias (__vn_pow, _ZGVnN2vv_pow)
+#include "v_pow.c"
+#endif