diff options
-rw-r--r-- | math/include/mathlib.h | 4 | ||||
-rw-r--r-- | math/s_pow.c | 6 | ||||
-rw-r--r-- | math/test/mathbench.c | 28 | ||||
-rwxr-xr-x | math/test/runulp.sh | 15 | ||||
-rw-r--r-- | math/test/ulp.c | 7 | ||||
-rw-r--r-- | math/v_math.h | 30 | ||||
-rw-r--r-- | math/v_pow.c | 27 | ||||
-rw-r--r-- | math/vn_pow.c | 12 |
8 files changed, 129 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h index 254954a..4493008 100644 --- a/math/include/mathlib.h +++ b/math/include/mathlib.h @@ -36,6 +36,7 @@ double __s_sin (double); double __s_cos (double); double __s_exp (double); double __s_log (double); +double __s_pow (double, double); #if __aarch64__ #if __GNUC__ >= 5 @@ -61,6 +62,7 @@ __f64x2_t __v_sin (__f64x2_t); __f64x2_t __v_cos (__f64x2_t); __f64x2_t __v_exp (__f64x2_t); __f64x2_t __v_log (__f64x2_t); +__f64x2_t __v_pow (__f64x2_t, __f64x2_t); #if __GNUC__ >= 9 || __clang_major__ >= 8 #define __vpcs __attribute__((__aarch64_vector_pcs__)) @@ -78,6 +80,7 @@ __vpcs __f64x2_t __vn_sin (__f64x2_t); __vpcs __f64x2_t __vn_cos (__f64x2_t); __vpcs __f64x2_t __vn_exp (__f64x2_t); __vpcs __f64x2_t __vn_log (__f64x2_t); +__vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t); /* Vector functions following the vector PCS using ABI names. */ __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); @@ -90,6 +93,7 @@ __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t); #endif #endif diff --git a/math/s_pow.c b/math/s_pow.c new file mode 100644 index 0000000..2e34c9f --- /dev/null +++ b/math/s_pow.c @@ -0,0 +1,6 @@ +/* + * Copyright (c) 2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#define SCALAR 1 +#include "v_pow.c" diff --git a/math/test/mathbench.c b/math/test/mathbench.c index 8d3ff1d..33ceda3 100644 --- a/math/test/mathbench.c +++ b/math/test/mathbench.c @@ -128,6 +128,18 @@ xy_Z_powf (v_float x) { return _ZGVnN4vv_powf (x, x); } + +__vpcs static v_double +xy__vn_pow (v_double x) +{ + return __vn_pow (x, x); +} + +__vpcs static v_double +xy_Z_pow (v_double x) +{ + return _ZGVnN2vv_pow (x, x); +} #endif static v_float @@ -135,6 +147,12 @@ xy__v_powf (v_float x) { return __v_powf (x, x); } + +static v_double +xy__v_pow (v_double x) +{ + return __v_pow (x, x); +} #endif static float @@ -142,6 +160,12 @@ xy__s_powf (float x) { return __s_powf (x, x); } + +static double +xy__s_pow (double x) +{ + return __s_pow (x, x); +} #endif static double @@ -256,6 +280,7 @@ D (__s_sin, -3.1, 3.1) D (__s_cos, -3.1, 3.1) D (__s_exp, -9.9, 9.9) D (__s_log, 0.01, 11.1) +{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}}, F (__s_expf, -9.9, 9.9) F (__s_expf_1u, -9.9, 9.9) F (__s_exp2f, -9.9, 9.9) @@ -270,6 +295,7 @@ VD (__v_sin, -3.1, 3.1) VD (__v_cos, -3.1, 3.1) VD (__v_exp, -9.9, 9.9) VD (__v_log, 0.01, 11.1) +{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}}, VF (__v_dummyf, 1.0, 2.0) VF (__v_expf, -9.9, 9.9) VF (__v_expf_1u, -9.9, 9.9) @@ -285,6 +311,8 @@ VND (__vn_exp, -9.9, 9.9) VND (_ZGVnN2v_exp, -9.9, 9.9) VND (__vn_log, 0.01, 11.1) VND (_ZGVnN2v_log, 0.01, 11.1) +{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}}, +{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}}, VND (__vn_sin, -3.1, 3.1) VND (_ZGVnN2v_sin, -3.1, 3.1) VND (__vn_cos, -3.1, 3.1) diff --git a/math/test/runulp.sh b/math/test/runulp.sh index 44393b8..ea524ca 100755 --- a/math/test/runulp.sh +++ b/math/test/runulp.sh @@ -110,6 +110,15 @@ range_exp=' -633.3 -777.3 10000 ' +range_pow=' + 0x1p-1 0x1p1 x 0x1p-10 0x1p10 50000 + 0x1p-1 0x1p1 x -0x1p-10 -0x1p10 50000 + 0x1p-500 0x1p500 x 0x1p-1 0x1p1 50000 + 0x1p-500 0x1p500 x -0x1p-1 -0x1p1 50000 + 0x1.ep-1 0x1.1p0 x 0x1p8 0x1p16 50000 + 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16 50000 +' + range_expf=' 0 0xffff0000 10000 0x1p-14 0x1p8 500000 @@ -143,6 +152,7 @@ range_powf=' # error limits L_exp=1.9 +L_pow=0.05 L_expf=1.49 L_expf_1u=0.4 L_exp2f=1.49 @@ -173,6 +183,11 @@ exp __v_exp $runv exp __vn_exp $runvn exp _ZGVnN2v_exp $runvn +pow __s_pow $runs +pow __v_pow $runv +pow __vn_pow $runvn +pow _ZGVnN2vv_pow $runvn + expf __s_expf $runs expf __v_expf $runv expf __vn_expf $runvn diff --git a/math/test/ulp.c b/math/test/ulp.c index b746080..444bbca 100644 --- a/math/test/ulp.c +++ b/math/test/ulp.c @@ -240,6 +240,7 @@ static double v_sin(double x) { return __v_sin(argd(x))[0]; } static double v_cos(double x) { return __v_cos(argd(x))[0]; } static double v_exp(double x) { return __v_exp(argd(x))[0]; } static double v_log(double x) { return __v_log(argd(x))[0]; } +static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; } #ifdef __vpcs static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; } static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; } @@ -253,6 +254,7 @@ static double vn_sin(double x) { return __vn_sin(argd(x))[0]; } static double vn_cos(double x) { return __vn_cos(argd(x))[0]; } static double vn_exp(double x) { return __vn_exp(argd(x))[0]; } static double vn_log(double x) { return __vn_log(argd(x))[0]; } +static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; } static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; } static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; } static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; } @@ -263,6 +265,7 @@ static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; } static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; } static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; } static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; } +static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; } #endif #endif @@ -334,6 +337,7 @@ static const struct fun fun[] = { F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0) F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0) F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0) + F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0) #if __aarch64__ F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1) @@ -347,6 +351,7 @@ static const struct fun fun[] = { F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1) F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1) + F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1) #ifdef __vpcs F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1) @@ -360,6 +365,7 @@ static const struct fun fun[] = { F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1) F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1) + F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1) F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1) F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1) @@ -370,6 +376,7 @@ static const struct fun fun[] = { F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1) F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1) + F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1) #endif #endif #endif diff --git a/math/v_math.h b/math/v_math.h index 0861e98..3db22e5 100644 --- a/math/v_math.h +++ b/math/v_math.h @@ -249,6 +249,11 @@ v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y, return f (x1, x2); } +static inline int +v_lanes64 (void) +{ + return 1; +} static inline v_f64_t v_f64 (f64_t x) { @@ -264,6 +269,16 @@ v_s64 (s64_t x) { return x; } +static inline f64_t +v_get_f64 (v_f64_t x, int i) +{ + return x; +} +static inline void +v_set_f64 (v_f64_t *x, int i, f64_t v) +{ + *x = v; +} /* true if any elements of a v_cond result is non-zero. */ static inline int v_any_u64 (v_u64_t x) @@ -506,6 +521,11 @@ v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y, p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]}; } +static inline int +v_lanes64 (void) +{ + return 2; +} static inline v_f64_t v_f64 (f64_t x) { @@ -521,6 +541,16 @@ v_s64 (s64_t x) { return (v_s64_t){x, x}; } +static inline f64_t +v_get_f64 (v_f64_t x, int i) +{ + return x[i]; +} +static inline void +v_set_f64 (v_f64_t *x, int i, f64_t v) +{ + (*x)[i] = v; +} /* true if any elements of a v_cond result is non-zero. */ static inline int v_any_u64 (v_u64_t x) diff --git a/math/v_pow.c b/math/v_pow.c new file mode 100644 index 0000000..a209d57 --- /dev/null +++ b/math/v_pow.c @@ -0,0 +1,27 @@ +/* + * Double-precision vector pow function. + * + * Copyright (c) 2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "mathlib.h" +#include "v_math.h" +#if V_SUPPORTED + +VPCS_ATTR +v_f64_t +V_NAME(pow) (v_f64_t x, v_f64_t y) +{ + v_f64_t z; + for (int lane = 0; lane < v_lanes64 (); lane++) + { + f64_t sx = v_get_f64 (x, lane); + f64_t sy = v_get_f64 (y, lane); + f64_t sz = pow (sx, sy); + v_set_f64 (&z, lane, sz); + } + return z; +} +VPCS_ALIAS +#endif diff --git a/math/vn_pow.c b/math/vn_pow.c new file mode 100644 index 0000000..2609501 --- /dev/null +++ b/math/vn_pow.c @@ -0,0 +1,12 @@ +/* + * AdvSIMD vector PCS variant of __v_pow. + * + * Copyright (c) 2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#include "mathlib.h" +#ifdef __vpcs +#define VPCS 1 +#define VPCS_ALIAS strong_alias (__vn_pow, _ZGVnN2vv_pow) +#include "v_pow.c" +#endif |