diff options
author | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2020-01-10 15:10:45 +0000 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2020-01-14 12:47:48 +0000 |
commit | a807c9bba198cc89ddd6cb177442e1e297c935b3 (patch) | |
tree | 2d8578dfdc4d6d45257a4518f8244396e876cd7b /math/test | |
parent | 099350affd8bd6eebf9d1e067b102530740b7270 (diff) | |
download | arm-optimized-routines-a807c9bba198cc89ddd6cb177442e1e297c935b3.tar.gz |
math: add vector pow
This implementation is a wrapper around the scalar pow with appropriate
call abi. As such it is not expected to be faster than scalar calls,
the new double prec vector pow symbols are provided for completeness.
Diffstat (limited to 'math/test')
-rw-r--r-- | math/test/mathbench.c | 28 | ||||
-rwxr-xr-x | math/test/runulp.sh | 15 | ||||
-rw-r--r-- | math/test/ulp.c | 7 |
3 files changed, 50 insertions, 0 deletions
diff --git a/math/test/mathbench.c b/math/test/mathbench.c index 8d3ff1d..33ceda3 100644 --- a/math/test/mathbench.c +++ b/math/test/mathbench.c @@ -128,6 +128,18 @@ xy_Z_powf (v_float x) { return _ZGVnN4vv_powf (x, x); } + +__vpcs static v_double +xy__vn_pow (v_double x) +{ + return __vn_pow (x, x); +} + +__vpcs static v_double +xy_Z_pow (v_double x) +{ + return _ZGVnN2vv_pow (x, x); +} #endif static v_float @@ -135,6 +147,12 @@ xy__v_powf (v_float x) { return __v_powf (x, x); } + +static v_double +xy__v_pow (v_double x) +{ + return __v_pow (x, x); +} #endif static float @@ -142,6 +160,12 @@ xy__s_powf (float x) { return __s_powf (x, x); } + +static double +xy__s_pow (double x) +{ + return __s_pow (x, x); +} #endif static double @@ -256,6 +280,7 @@ D (__s_sin, -3.1, 3.1) D (__s_cos, -3.1, 3.1) D (__s_exp, -9.9, 9.9) D (__s_log, 0.01, 11.1) +{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}}, F (__s_expf, -9.9, 9.9) F (__s_expf_1u, -9.9, 9.9) F (__s_exp2f, -9.9, 9.9) @@ -270,6 +295,7 @@ VD (__v_sin, -3.1, 3.1) VD (__v_cos, -3.1, 3.1) VD (__v_exp, -9.9, 9.9) VD (__v_log, 0.01, 11.1) +{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}}, VF (__v_dummyf, 1.0, 2.0) VF (__v_expf, -9.9, 9.9) VF (__v_expf_1u, -9.9, 9.9) @@ -285,6 +311,8 @@ VND (__vn_exp, -9.9, 9.9) VND (_ZGVnN2v_exp, -9.9, 9.9) VND (__vn_log, 0.01, 11.1) VND (_ZGVnN2v_log, 0.01, 11.1) +{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}}, +{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}}, VND (__vn_sin, -3.1, 3.1) VND (_ZGVnN2v_sin, -3.1, 3.1) VND (__vn_cos, -3.1, 3.1) diff --git a/math/test/runulp.sh b/math/test/runulp.sh index 44393b8..ea524ca 100755 --- a/math/test/runulp.sh +++ b/math/test/runulp.sh @@ -110,6 +110,15 @@ range_exp=' -633.3 -777.3 10000 ' +range_pow=' + 0x1p-1 0x1p1 x 0x1p-10 0x1p10 50000 + 0x1p-1 0x1p1 x -0x1p-10 -0x1p10 50000 + 0x1p-500 0x1p500 x 0x1p-1 0x1p1 50000 + 0x1p-500 0x1p500 x -0x1p-1 -0x1p1 50000 + 0x1.ep-1 0x1.1p0 x 0x1p8 0x1p16 50000 + 0x1.ep-1 0x1.1p0 x -0x1p8 -0x1p16 50000 +' + range_expf=' 0 0xffff0000 10000 0x1p-14 0x1p8 500000 @@ -143,6 +152,7 @@ range_powf=' # error limits L_exp=1.9 +L_pow=0.05 L_expf=1.49 L_expf_1u=0.4 L_exp2f=1.49 @@ -173,6 +183,11 @@ exp __v_exp $runv exp __vn_exp $runvn exp _ZGVnN2v_exp $runvn +pow __s_pow $runs +pow __v_pow $runv +pow __vn_pow $runvn +pow _ZGVnN2vv_pow $runvn + expf __s_expf $runs expf __v_expf $runv expf __vn_expf $runvn diff --git a/math/test/ulp.c b/math/test/ulp.c index b746080..444bbca 100644 --- a/math/test/ulp.c +++ b/math/test/ulp.c @@ -240,6 +240,7 @@ static double v_sin(double x) { return __v_sin(argd(x))[0]; } static double v_cos(double x) { return __v_cos(argd(x))[0]; } static double v_exp(double x) { return __v_exp(argd(x))[0]; } static double v_log(double x) { return __v_log(argd(x))[0]; } +static double v_pow(double x, double y) { return __v_pow(argd(x),argd(y))[0]; } #ifdef __vpcs static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; } static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; } @@ -253,6 +254,7 @@ static double vn_sin(double x) { return __vn_sin(argd(x))[0]; } static double vn_cos(double x) { return __vn_cos(argd(x))[0]; } static double vn_exp(double x) { return __vn_exp(argd(x))[0]; } static double vn_log(double x) { return __vn_log(argd(x))[0]; } +static double vn_pow(double x, double y) { return __vn_pow(argd(x),argd(y))[0]; } static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; } static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; } static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; } @@ -263,6 +265,7 @@ static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; } static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; } static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; } static double Z_log(double x) { return _ZGVnN2v_log(argd(x))[0]; } +static double Z_pow(double x, double y) { return _ZGVnN2vv_pow(argd(x),argd(y))[0]; } #endif #endif @@ -334,6 +337,7 @@ static const struct fun fun[] = { F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0) F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0) F (__s_log, __s_log, logl, mpfr_log, 1, 0, d1, 0) + F (__s_pow, __s_pow, powl, mpfr_pow, 2, 0, d2, 0) #if __aarch64__ F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1) @@ -347,6 +351,7 @@ static const struct fun fun[] = { F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1) F (__v_log, v_log, logl, mpfr_log, 1, 0, d1, 1) + F (__v_pow, v_pow, powl, mpfr_pow, 2, 0, d2, 1) #ifdef __vpcs F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1) @@ -360,6 +365,7 @@ static const struct fun fun[] = { F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1) F (__vn_log, vn_log, logl, mpfr_log, 1, 0, d1, 1) + F (__vn_pow, vn_pow, powl, mpfr_pow, 2, 0, d2, 1) F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1) F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1) @@ -370,6 +376,7 @@ static const struct fun fun[] = { F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1) F (_ZGVnN2v_log, Z_log, logl, mpfr_log, 1, 0, d1, 1) + F (_ZGVnN2vv_pow, Z_pow, powl, mpfr_pow, 2, 0, d2, 1) #endif #endif #endif |