diff options
author | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2020-01-10 15:10:45 +0000 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2020-01-14 12:47:48 +0000 |
commit | a807c9bba198cc89ddd6cb177442e1e297c935b3 (patch) | |
tree | 2d8578dfdc4d6d45257a4518f8244396e876cd7b /math | |
parent | 099350affd8bd6eebf9d1e067b102530740b7270 (diff) | |
download | arm-optimized-routines-a807c9bba198cc89ddd6cb177442e1e297c935b3.tar.gz |
math: add vector pow
This implementation is a wrapper around the scalar pow with appropriate
call abi. As such it is not expected to be faster than scalar calls,
the new double prec vector pow symbols are provided for completeness.
Diffstat (limited to 'math')
-rw-r--r-- | math/include/mathlib.h | 4 | ||||
-rw-r--r-- | math/s_pow.c | 6 | ||||
-rw-r--r-- | math/test/mathbench.c | 28 | ||||
-rwxr-xr-x | math/test/runulp.sh | 15 | ||||
-rw-r--r-- | math/test/ulp.c | 7 | ||||
-rw-r--r-- | math/v_math.h | 30 | ||||
-rw-r--r-- | math/v_pow.c | 27 | ||||
-rw-r--r-- | math/vn_pow.c | 12 |
8 files changed, 129 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h index 254954a..4493008 100644 --- a/math/include/mathlib.h +++ b/math/include/mathlib.h @@ -36,6 +36,7 @@ double __s_sin (double); double __s_cos (double); double __s_exp (double); double __s_log (double); +double __s_pow (double, double); #if __aarch64__ #if __GNUC__ >= 5 @@ -61,6 +62,7 @@ __f64x2_t __v_sin (__f64x2_t); __f64x2_t __v_cos (__f64x2_t); __f64x2_t __v_exp (__f64x2_t); __f64x2_t __v_log (__f64x2_t); +__f64x2_t __v_pow (__f64x2_t, __f64x2_t); #if __GNUC__ >= 9 || __clang_major__ >= 8 #define __vpcs __attribute__((__aarch64_vector_pcs__)) @@ -78,6 +80,7 @@ __vpcs __f64x2_t __vn_sin (__f64x2_t); __vpcs __f64x2_t __vn_cos (__f64x2_t); __vpcs __f64x2_t __vn_exp (__f64x2_t); __vpcs __f64x2_t __vn_log (__f64x2_t); +__vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t); /* Vector functions following the vector PCS using ABI names. */ __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); @@ -90,6 +93,7 @@ __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t); #endif #endif diff --git a/math/s_pow.c b/math/s_pow.c new file mode 100644 index 0000000..2e34c9f --- /dev/null +++ b/math/s_pow.c @@ -0,0 +1,6 @@ +/* + * Copyright (c) 2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#define SCALAR 1 +#include "v_pow.c" diff --git a/math/test/mathbench.c b/math/test/mathbench.c index 8d3ff1d..33ceda3 100644 --- a/math/test/mathbench.c +++ b/math/test/mathbench.c @@ -128,6 +128,18 @@ xy_Z_powf (v_float x) { return _ZGVnN4vv_powf (x, x); } + +__vpcs static v_double +xy__vn_pow (v_double x) +{ + return __vn_pow (x, x); +} + +__vpcs static v_double +xy_Z_pow (v_double x) +{ + return _ZGVnN2vv_pow (x, x); +} #endif static v_float @@ -135,6 +147,12 @@ xy__v_powf (v_float x) { return __v_powf (x, x); } + +static v_double +xy__v_pow (v_double x) +{ + return __v_pow (x, x); +} #endif static float @@ -142,6 +160,12 @@ xy__s_powf (float x) { return __s_powf (x, x); } + +static double +xy__s_pow (double x) +{ + return __s_pow (x, x); +} #endif static double @@ -256,6 +280,7 @@ D (__s_sin, -3.1, 3.1) D (__s_cos, -3.1, 3.1) D (__s_exp, -9.9, 9.9) D (__s_log, 0.01, 11.1) +{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}}, F (__s_expf, -9.9, 9.9) F (__s_expf_1u, -9.9, 9.9) F (__s_exp2f, -9.9, 9.9) @@ -270,6 +295,7 @@ VD (__v_sin, -3.1, 3.1) VD (__v_cos, -3.1, 3.1) VD (__v_exp, -9.9, 9.9) VD (__v_log, 0.01, 11.1) +{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}}, VF (__v_dummyf, 1.0, 2.0) VF (__v_expf, -9.9, 9.9) VF (__v_expf_1u, -9.9, 9.9) @@ -285,6 +311,8 @@ VND (__vn_exp, -9.9, 9.9) VND (_ZGVnN2v_exp, -9.9, 9.9) VND (__vn_log, 0.01, 11.1) VND (_ZGVnN2v_log, 0.01, 11.1) +{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}}, +{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}}, VND (__vn_sin, -3.1, 3.1) VND (_ZGVnN2v_sin, -3.1, 3.1) VND (__vn_cos, -3.1, 3.1) diff --git a/math/test/runulp.sh b/math/test/runulp.sh index 44393b8..ea524ca 100755 --- a/math/test/runulp.sh +++ b/math/test/runulp.sh @@ -110,6 +110,15 @@ range_exp=' -633.3 -777.3 10000 ' +range_pow=' + 0x1p-1 0x1p1 x 0x1p-10 0x1p10 50000 + 0x1p-1 0x1p1 x -0x1p-10 -0x1p10 50000 + 0x1p-500 0x1p500 x 0x1p-1 0x1p1 50000 + 0x1p-500 0x1p500 x -0x1p-1 -0x1p1 50000 + 0x1.ep-1 0x1.1p0 x 0x1p8 0x1p16 50000 + 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16 50000 +' + range_expf=' 0 0xffff0000 10000 0x1p-14 0x1p8 500000 @@ -143,6 +152,7 @@ range_powf=' # error limits L_exp=1.9 +L_pow=0.05 L_expf=1.49 L_expf_1u=0.4 L_exp2f=1.49 @@ -173,6 +183,11 @@ exp __v_exp $runv exp __vn_exp $runvn exp _ZGVnN2v_exp $runvn +pow __s_pow $runs +pow __v_pow $runv +pow __vn_pow $runvn +pow _ZGVnN2vv_pow $runvn + expf __s_expf $runs expf __v_expf $runv expf __vn_expf $runvn diff --git a/math/test/ulp.c b/math/test/ulp.c index b746080..444bbca 100644 --- a/math/test/ulp.c +++ b/math/test/ulp.c @@ -240,6 +240,7 @@ static double v_sin(double x) { return __v_sin(argd(x))[0]; } static double v_cos(double x) { return __v_cos(argd(x))[0]; } static double v_exp(double x) { return __v_exp(argd(x))[0]; } static double v_log(double x) { return __v_log(argd(x))[0]; } +static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; } #ifdef __vpcs static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; } static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; } @@ -253,6 +254,7 @@ static double vn_sin(double x) { return __vn_sin(argd(x))[0]; } static double vn_cos(double x) { return __vn_cos(argd(x))[0]; } static double vn_exp(double x) { return __vn_exp(argd(x))[0]; } static double vn_log(double x) { return __vn_log(argd(x))[0]; } +static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; } static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; } static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; } static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; } @@ -263,6 +265,7 @@ static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; } static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; } static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; } static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; } +static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; } #endif #endif @@ -334,6 +337,7 @@ static const struct fun fun[] = { F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0) F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0) F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0) + F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0) #if __aarch64__ F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1) @@ -347,6 +351,7 @@ static const struct fun fun[] = { F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1) F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1) + F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1) #ifdef __vpcs F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1) @@ -360,6 +365,7 @@ static const struct fun fun[] = { F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1) F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1) + F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1) F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1) F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1) @@ -370,6 +376,7 @@ static const struct fun fun[] = { F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1) F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1) + F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1) #endif #endif #endif diff --git a/math/v_math.h b/math/v_math.h index 0861e98..3db22e5 100644 --- a/math/v_math.h +++ b/math/v_math.h @@ -249,6 +249,11 @@ v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y, return f (x1, x2); } +static inline int +v_lanes64 (void) +{ + return 1; +} static inline v_f64_t v_f64 (f64_t x) { @@ -264,6 +269,16 @@ v_s64 (s64_t x) { return x; } +static inline f64_t +v_get_f64 (v_f64_t x, int i) +{ + return x; +} +static inline void +v_set_f64 (v_f64_t *x, int i, f64_t v) +{ + *x = v; +} /* true if any elements of a v_cond result is non-zero. */ static inline int v_any_u64 (v_u64_t x) @@ -506,6 +521,11 @@ v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y, p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]}; } +static inline int +v_lanes64 (void) +{ + return 2; +} static inline v_f64_t v_f64 (f64_t x) { @@ -521,6 +541,16 @@ v_s64 (s64_t x) { return (v_s64_t){x, x}; } +static inline f64_t +v_get_f64 (v_f64_t x, int i) +{ + return x[i]; +} +static inline void +v_set_f64 (v_f64_t *x, int i, f64_t v) +{ + (*x)[i] = v; +} /* true if any elements of a v_cond result is non-zero. */ static inline int v_any_u64 (v_u64_t x) diff --git a/math/v_pow.c b/math/v_pow.c new file mode 100644 index 0000000..a209d57 --- /dev/null +++ b/math/v_pow.c @@ -0,0 +1,27 @@ +/* + * Double-precision vector pow function. + * + * Copyright (c) 2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "mathlib.h" +#include "v_math.h" +#if V_SUPPORTED + +VPCS_ATTR +v_f64_t +V_NAME(pow) (v_f64_t x, v_f64_t y) +{ + v_f64_t z; + for (int lane = 0; lane < v_lanes64 (); lane++) + { + f64_t sx = v_get_f64 (x, lane); + f64_t sy = v_get_f64 (y, lane); + f64_t sz = pow (sx, sy); + v_set_f64 (&z, lane, sz); + } + return z; +} +VPCS_ALIAS +#endif diff --git a/math/vn_pow.c b/math/vn_pow.c new file mode 100644 index 0000000..2609501 --- /dev/null +++ b/math/vn_pow.c @@ -0,0 +1,12 @@ +/* + * AdvSIMD vector PCS variant of __v_pow. + * + * Copyright (c) 2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#include "mathlib.h" +#ifdef __vpcs +#define VPCS 1 +#define VPCS_ALIAS strong_alias (__vn_pow, _ZGVnN2vv_pow) +#include "v_pow.c" +#endif |