aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--math/include/mathlib.h4
-rw-r--r--math/s_pow.c6
-rw-r--r--math/test/mathbench.c28
-rwxr-xr-xmath/test/runulp.sh15
-rw-r--r--math/test/ulp.c7
-rw-r--r--math/v_math.h30
-rw-r--r--math/v_pow.c27
-rw-r--r--math/vn_pow.c12
8 files changed, 129 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h
index 254954a..4493008 100644
--- a/math/include/mathlib.h
+++ b/math/include/mathlib.h
@@ -36,6 +36,7 @@ double __s_sin (double);
double __s_cos (double);
double __s_exp (double);
double __s_log (double);
+double __s_pow (double, double);
#if __aarch64__
#if __GNUC__ >= 5
@@ -61,6 +62,7 @@ __f64x2_t __v_sin (__f64x2_t);
__f64x2_t __v_cos (__f64x2_t);
__f64x2_t __v_exp (__f64x2_t);
__f64x2_t __v_log (__f64x2_t);
+__f64x2_t __v_pow (__f64x2_t, __f64x2_t);
#if __GNUC__ >= 9 || __clang_major__ >= 8
#define __vpcs __attribute__((__aarch64_vector_pcs__))
@@ -78,6 +80,7 @@ __vpcs __f64x2_t __vn_sin (__f64x2_t);
__vpcs __f64x2_t __vn_cos (__f64x2_t);
__vpcs __f64x2_t __vn_exp (__f64x2_t);
__vpcs __f64x2_t __vn_log (__f64x2_t);
+__vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t);
/* Vector functions following the vector PCS using ABI names. */
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
@@ -90,6 +93,7 @@ __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
#endif
#endif
diff --git a/math/s_pow.c b/math/s_pow.c
new file mode 100644
index 0000000..2e34c9f
--- /dev/null
+++ b/math/s_pow.c
@@ -0,0 +1,6 @@
+/*
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#define SCALAR 1
+#include "v_pow.c"
diff --git a/math/test/mathbench.c b/math/test/mathbench.c
index 8d3ff1d..33ceda3 100644
--- a/math/test/mathbench.c
+++ b/math/test/mathbench.c
@@ -128,6 +128,18 @@ xy_Z_powf (v_float x)
{
return _ZGVnN4vv_powf (x, x);
}
+
+__vpcs static v_double
+xy__vn_pow (v_double x)
+{
+ return __vn_pow (x, x);
+}
+
+__vpcs static v_double
+xy_Z_pow (v_double x)
+{
+ return _ZGVnN2vv_pow (x, x);
+}
#endif
static v_float
@@ -135,6 +147,12 @@ xy__v_powf (v_float x)
{
return __v_powf (x, x);
}
+
+static v_double
+xy__v_pow (v_double x)
+{
+ return __v_pow (x, x);
+}
#endif
static float
@@ -142,6 +160,12 @@ xy__s_powf (float x)
{
return __s_powf (x, x);
}
+
+static double
+xy__s_pow (double x)
+{
+ return __s_pow (x, x);
+}
#endif
static double
@@ -256,6 +280,7 @@ D (__s_sin, -3.1, 3.1)
D (__s_cos, -3.1, 3.1)
D (__s_exp, -9.9, 9.9)
D (__s_log, 0.01, 11.1)
+{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
F (__s_expf, -9.9, 9.9)
F (__s_expf_1u, -9.9, 9.9)
F (__s_exp2f, -9.9, 9.9)
@@ -270,6 +295,7 @@ VD (__v_sin, -3.1, 3.1)
VD (__v_cos, -3.1, 3.1)
VD (__v_exp, -9.9, 9.9)
VD (__v_log, 0.01, 11.1)
+{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
VF (__v_dummyf, 1.0, 2.0)
VF (__v_expf, -9.9, 9.9)
VF (__v_expf_1u, -9.9, 9.9)
@@ -285,6 +311,8 @@ VND (__vn_exp, -9.9, 9.9)
VND (_ZGVnN2v_exp, -9.9, 9.9)
VND (__vn_log, 0.01, 11.1)
VND (_ZGVnN2v_log, 0.01, 11.1)
+{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
+{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
VND (__vn_sin, -3.1, 3.1)
VND (_ZGVnN2v_sin, -3.1, 3.1)
VND (__vn_cos, -3.1, 3.1)
diff --git a/math/test/runulp.sh b/math/test/runulp.sh
index 44393b8..ea524ca 100755
--- a/math/test/runulp.sh
+++ b/math/test/runulp.sh
@@ -110,6 +110,15 @@ range_exp='
-633.3 -777.3 10000
'
+range_pow='
+ 0x1p-1 0x1p1 x 0x1p-10 0x1p10 50000
+ 0x1p-1 0x1p1 x -0x1p-10 -0x1p10 50000
+ 0x1p-500 0x1p500 x 0x1p-1 0x1p1 50000
+ 0x1p-500 0x1p500 x -0x1p-1 -0x1p1 50000
+ 0x1.ep-1 0x1.1p0 x 0x1p8 0x1p16 50000
+ 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16 50000
+'
+
range_expf='
0 0xffff0000 10000
0x1p-14 0x1p8 500000
@@ -143,6 +152,7 @@ range_powf='
# error limits
L_exp=1.9
+L_pow=0.05
L_expf=1.49
L_expf_1u=0.4
L_exp2f=1.49
@@ -173,6 +183,11 @@ exp __v_exp $runv
exp __vn_exp $runvn
exp _ZGVnN2v_exp $runvn
+pow __s_pow $runs
+pow __v_pow $runv
+pow __vn_pow $runvn
+pow _ZGVnN2vv_pow $runvn
+
expf __s_expf $runs
expf __v_expf $runv
expf __vn_expf $runvn
diff --git a/math/test/ulp.c b/math/test/ulp.c
index b746080..444bbca 100644
--- a/math/test/ulp.c
+++ b/math/test/ulp.c
@@ -240,6 +240,7 @@ static double v_sin(double x) { return __v_sin(argd(x))[0]; }
static double v_cos(double x) { return __v_cos(argd(x))[0]; }
static double v_exp(double x) { return __v_exp(argd(x))[0]; }
static double v_log(double x) { return __v_log(argd(x))[0]; }
+static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; }
#ifdef __vpcs
static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; }
static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; }
@@ -253,6 +254,7 @@ static double vn_sin(double x) { return __vn_sin(argd(x))[0]; }
static double vn_cos(double x) { return __vn_cos(argd(x))[0]; }
static double vn_exp(double x) { return __vn_exp(argd(x))[0]; }
static double vn_log(double x) { return __vn_log(argd(x))[0]; }
+static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; }
static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; }
static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; }
static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; }
@@ -263,6 +265,7 @@ static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; }
static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; }
static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; }
static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; }
+static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; }
#endif
#endif
@@ -334,6 +337,7 @@ static const struct fun fun[] = {
F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0)
F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0)
F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0)
+ F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0)
#if __aarch64__
F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1)
F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1)
@@ -347,6 +351,7 @@ static const struct fun fun[] = {
F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1)
F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1)
F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1)
#ifdef __vpcs
F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1)
F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1)
@@ -360,6 +365,7 @@ static const struct fun fun[] = {
F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1)
F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1)
F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1)
F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1)
F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1)
F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1)
@@ -370,6 +376,7 @@ static const struct fun fun[] = {
F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1)
F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1)
F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1)
+ F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1)
#endif
#endif
#endif
diff --git a/math/v_math.h b/math/v_math.h
index 0861e98..3db22e5 100644
--- a/math/v_math.h
+++ b/math/v_math.h
@@ -249,6 +249,11 @@ v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
return f (x1, x2);
}
+static inline int
+v_lanes64 (void)
+{
+ return 1;
+}
static inline v_f64_t
v_f64 (f64_t x)
{
@@ -264,6 +269,16 @@ v_s64 (s64_t x)
{
return x;
}
+static inline f64_t
+v_get_f64 (v_f64_t x, int i)
+{
+ return x;
+}
+static inline void
+v_set_f64 (v_f64_t *x, int i, f64_t v)
+{
+ *x = v;
+}
/* true if any elements of a v_cond result is non-zero. */
static inline int
v_any_u64 (v_u64_t x)
@@ -506,6 +521,11 @@ v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
}
+static inline int
+v_lanes64 (void)
+{
+ return 2;
+}
static inline v_f64_t
v_f64 (f64_t x)
{
@@ -521,6 +541,16 @@ v_s64 (s64_t x)
{
return (v_s64_t){x, x};
}
+static inline f64_t
+v_get_f64 (v_f64_t x, int i)
+{
+ return x[i];
+}
+static inline void
+v_set_f64 (v_f64_t *x, int i, f64_t v)
+{
+ (*x)[i] = v;
+}
/* true if any elements of a v_cond result is non-zero. */
static inline int
v_any_u64 (v_u64_t x)
diff --git a/math/v_pow.c b/math/v_pow.c
new file mode 100644
index 0000000..a209d57
--- /dev/null
+++ b/math/v_pow.c
@@ -0,0 +1,27 @@
+/*
+ * Double-precision vector pow function.
+ *
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#if V_SUPPORTED
+
+VPCS_ATTR
+v_f64_t
+V_NAME(pow) (v_f64_t x, v_f64_t y)
+{
+ v_f64_t z;
+ for (int lane = 0; lane < v_lanes64 (); lane++)
+ {
+ f64_t sx = v_get_f64 (x, lane);
+ f64_t sy = v_get_f64 (y, lane);
+ f64_t sz = pow (sx, sy);
+ v_set_f64 (&z, lane, sz);
+ }
+ return z;
+}
+VPCS_ALIAS
+#endif
diff --git a/math/vn_pow.c b/math/vn_pow.c
new file mode 100644
index 0000000..2609501
--- /dev/null
+++ b/math/vn_pow.c
@@ -0,0 +1,12 @@
+/*
+ * AdvSIMD vector PCS variant of __v_pow.
+ *
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#include "mathlib.h"
+#ifdef __vpcs
+#define VPCS 1
+#define VPCS_ALIAS strong_alias (__vn_pow, _ZGVnN2vv_pow)
+#include "v_pow.c"
+#endif