diff options
-rw-r--r-- | math/include/mathlib.h | 7 | ||||
-rw-r--r-- | math/s_exp2f.c | 6 | ||||
-rw-r--r-- | math/s_exp2f_1u.c | 6 | ||||
-rw-r--r-- | math/test/mathbench.c | 7 | ||||
-rwxr-xr-x | math/test/runulp.sh | 13 | ||||
-rw-r--r-- | math/test/ulp.c | 12 | ||||
-rw-r--r-- | math/v_exp2f.c | 78 | ||||
-rw-r--r-- | math/v_exp2f_1u.c | 75 | ||||
-rw-r--r-- | math/v_expf.c | 8 | ||||
-rw-r--r-- | math/v_expf_1u.c | 8 | ||||
-rw-r--r-- | math/vn_exp2f.c | 12 | ||||
-rw-r--r-- | math/vn_exp2f_1u.c | 11 |
12 files changed, 243 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h index 5112068..254954a 100644 --- a/math/include/mathlib.h +++ b/math/include/mathlib.h @@ -28,6 +28,8 @@ float __s_sinf (float); float __s_cosf (float); float __s_expf (float); float __s_expf_1u (float); +float __s_exp2f (float); +float __s_exp2f_1u (float); float __s_logf (float); float __s_powf (float, float); double __s_sin (double); @@ -51,6 +53,8 @@ __f32x4_t __v_sinf (__f32x4_t); __f32x4_t __v_cosf (__f32x4_t); __f32x4_t __v_expf (__f32x4_t); __f32x4_t __v_expf_1u (__f32x4_t); +__f32x4_t __v_exp2f (__f32x4_t); +__f32x4_t __v_exp2f_1u (__f32x4_t); __f32x4_t __v_logf (__f32x4_t); __f32x4_t __v_powf (__f32x4_t, __f32x4_t); __f64x2_t __v_sin (__f64x2_t); @@ -66,6 +70,8 @@ __vpcs __f32x4_t __vn_sinf (__f32x4_t); __vpcs __f32x4_t __vn_cosf (__f32x4_t); __vpcs __f32x4_t __vn_expf (__f32x4_t); __vpcs __f32x4_t __vn_expf_1u (__f32x4_t); +__vpcs __f32x4_t __vn_exp2f (__f32x4_t); +__vpcs __f32x4_t __vn_exp2f_1u (__f32x4_t); __vpcs __f32x4_t __vn_logf (__f32x4_t); __vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t); __vpcs __f64x2_t __vn_sin (__f64x2_t); @@ -77,6 +83,7 @@ __vpcs __f64x2_t __vn_log (__f64x2_t); __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t); +__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); diff --git a/math/s_exp2f.c b/math/s_exp2f.c new file mode 100644 index 0000000..df7dfd6 --- /dev/null +++ b/math/s_exp2f.c @@ -0,0 +1,6 @@ +/* + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#define SCALAR 1 +#include "v_exp2f.c" diff --git a/math/s_exp2f_1u.c b/math/s_exp2f_1u.c new file mode 100644 index 0000000..5e3852b --- /dev/null +++ b/math/s_exp2f_1u.c @@ -0,0 +1,6 @@ +/* + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#define SCALAR 1 +#include "v_exp2f_1u.c" diff --git a/math/test/mathbench.c b/math/test/mathbench.c index 0736e8f..5ec0b0a 100644 --- a/math/test/mathbench.c +++ b/math/test/mathbench.c @@ -226,6 +226,8 @@ F (dummyf, 1.0, 2.0) F (expf, -9.9, 9.9) F (__s_expf, -9.9, 9.9) F (__s_expf_1u, -9.9, 9.9) +F (__s_exp2f, -9.9, 9.9) +F (__s_exp2f_1u, -9.9, 9.9) F (exp2f, -9.9, 9.9) F (logf, 0.01, 11.1) F (__s_logf, 0.01, 11.1) @@ -263,6 +265,8 @@ VD (__v_log, 0.01, 11.1) VF (__v_dummyf, 1.0, 2.0) VF (__v_expf, -9.9, 9.9) VF (__v_expf_1u, -9.9, 9.9) +VF (__v_exp2f, -9.9, 9.9) +VF (__v_exp2f_1u, -9.9, 9.9) VF (__v_logf, 0.01, 11.1) {"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}}, VF (__v_sinf, -3.1, 3.1) @@ -281,6 +285,9 @@ VNF (__vn_dummyf, 1.0, 2.0) VNF (__vn_expf, -9.9, 9.9) VNF (_ZGVnN4v_expf, -9.9, 9.9) VNF (__vn_expf_1u, -9.9, 9.9) +VNF (__vn_exp2f, -9.9, 9.9) +VNF (_ZGVnN4v_exp2f, -9.9, 9.9) +VNF (__vn_exp2f_1u, -9.9, 9.9) VNF (__vn_logf, 0.01, 11.1) VNF (_ZGVnN4v_logf, 0.01, 11.1) {"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}}, diff --git a/math/test/runulp.sh b/math/test/runulp.sh index bbe17e1..229757b 100755 --- a/math/test/runulp.sh +++ b/math/test/runulp.sh @@ -115,6 +115,8 @@ range_expf=' ' range_expf_1u="$range_expf" +range_exp2f="$range_expf" +range_exp2f_1u="$range_expf" range_logf=' 0 0xffff0000 10000 @@ -141,6 +143,8 @@ range_powf=' L_exp=1.9 L_expf=1.49 L_expf_1u=0.4 +L_exp2f=1.49 +L_exp2f_1u=0.4 L_logf=2.9 L_sinf=1.4 L_cosf=1.4 @@ -176,6 +180,15 @@ expf_1u __s_expf_1u 1 expf_1u __v_expf_1u $runv expf_1u __vn_expf_1u $runvn +exp2f __s_exp2f 1 +exp2f __v_exp2f $runv +exp2f __vn_exp2f $runvn +exp2f _ZGVnN4v_exp2f $runvn + +exp2f_1u __s_exp2f_1u 1 +exp2f_1u __v_exp2f_1u $runv +exp2f_1u __vn_exp2f_1u $runvn + logf __s_logf 1 logf __v_logf $runv logf __vn_logf $runvn diff --git a/math/test/ulp.c b/math/test/ulp.c index 60d89bf..ffe3501 100644 --- a/math/test/ulp.c +++ b/math/test/ulp.c @@ -227,6 +227,8 @@ static float v_sinf(float x) { return __v_sinf(argf(x))[0]; } static float v_cosf(float x) { return __v_cosf(argf(x))[0]; } static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; } static float v_expf(float x) { return __v_expf(argf(x))[0]; } +static float v_exp2f_1u(float x) { return __v_exp2f_1u(argf(x))[0]; } +static float v_exp2f(float x) { return __v_exp2f(argf(x))[0]; } static float v_logf(float x) { return __v_logf(argf(x))[0]; } static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; } static double v_sin(double x) { return __v_sin(argd(x))[0]; } @@ -238,6 +240,8 @@ static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; } static float vn_cosf(float x) { return __vn_cosf(argf(x))[0]; } static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; } static float vn_expf(float x) { return __vn_expf(argf(x))[0]; } +static float vn_exp2f_1u(float x) { return __vn_exp2f_1u(argf(x))[0]; } +static float vn_exp2f(float x) { return __vn_exp2f(argf(x))[0]; } static float vn_logf(float x) { return __vn_logf(argf(x))[0]; } static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; } static double vn_sin(double x) { return __vn_sin(argd(x))[0]; } @@ -247,6 +251,7 @@ static double vn_log(double x) { return __vn_log(argd(x))[0]; } static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; } static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; } static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; } +static float Z_exp2f(float x) { return _ZGVnN4v_exp2f(argf(x))[0]; } static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; } static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; } static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; } @@ -315,6 +320,8 @@ static const struct fun fun[] = { F (__s_cosf, __s_cosf, cos, mpfr_cos, 1, 1, f1, 0) F (__s_expf_1u, __s_expf_1u, exp, mpfr_exp, 1, 1, f1, 0) F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0) + F (__s_exp2f_1u, __s_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 0) + F (__s_exp2f, __s_exp2f, exp2, mpfr_exp2, 1, 1, f1, 0) F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0) F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0) F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0) @@ -326,6 +333,8 @@ static const struct fun fun[] = { F (__v_cosf, v_cosf, cos, mpfr_cos, 1, 1, f1, 1) F (__v_expf_1u, v_expf_1u, exp, mpfr_exp, 1, 1, f1, 1) F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1) + F (__v_exp2f_1u, v_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1) + F (__v_exp2f, v_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1) F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1) F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1) F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1) @@ -337,6 +346,8 @@ static const struct fun fun[] = { F (__vn_cosf, vn_cosf, cos, mpfr_cos, 1, 1, f1, 1) F (__vn_expf_1u, vn_expf_1u, exp, mpfr_exp, 1, 1, f1, 1) F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1) + F (__vn_exp2f_1u, vn_exp2f_1u, exp2, mpfr_exp2, 1, 1, f1, 1) + F (__vn_exp2f, vn_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1) F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1) F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1) F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1) @@ -346,6 +357,7 @@ static const struct fun fun[] = { F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1) F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1) + F (_ZGVnN4v_exp2f, Z_exp2f, exp2, mpfr_exp2, 1, 1, f1, 1) F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1) F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1) F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1) diff --git a/math/v_exp2f.c b/math/v_exp2f.c new file mode 100644 index 0000000..e3ea5af --- /dev/null +++ b/math/v_exp2f.c @@ -0,0 +1,78 @@ +/* + * Single-precision vector 2^x function. + * + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "mathlib.h" +#include "v_math.h" +#if V_SUPPORTED + +static const float Poly[] = { + /* maxerr: 1.962 ulp. */ + 0x1.59977ap-10f, + 0x1.3ce9e4p-7f, + 0x1.c6bd32p-5f, + 0x1.ebf9bcp-3f, + 0x1.62e422p-1f, +}; +#define C0 v_f32 (Poly[0]) +#define C1 v_f32 (Poly[1]) +#define C2 v_f32 (Poly[2]) +#define C3 v_f32 (Poly[3]) +#define C4 v_f32 (Poly[4]) + +#define Shift v_f32 (0x1.8p23f) + +VPCS_ATTR +static v_f32_t +specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn, v_u32_t cmp1, v_f32_t scale) +{ + /* 2^n may overflow, break it up into s1*s2. */ + v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x82000000); + v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b); + v_f32_t s2 = v_as_f32_u32 (e - b); + v_u32_t cmp2 = v_cond_u32 (absn > v_f32 (192.0f)); + v_u32_t r2 = v_as_u32_f32 (s1 * s1); + v_u32_t r1 = v_as_u32_f32 (v_fma_f32 (poly, s2, s2) * s1); + /* Similar to r1 but avoids double rounding in the subnormal range. */ + v_u32_t r0 = v_as_u32_f32 (v_fma_f32 (poly, scale, scale)); + return v_as_f32_u32 ((cmp2 & r2) | (~cmp2 & cmp1 & r1) | (~cmp1 & r0)); +} + +VPCS_ATTR +v_f32_t +V_NAME(exp2f) (v_f32_t x) +{ + v_f32_t n, r, r2, scale, p, q, poly, absn; + v_u32_t cmp, e; + + /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] + x = n + r, with r in [-1/2, 1/2]. */ +#if 0 + v_f32_t z; + z = x + Shift; + n = z - Shift; + r = x - n; + e = v_as_u32_f32 (z) << 23; +#else + n = v_round_f32 (x); + r = x - n; + e = v_as_u32_s32 (v_round_s32 (x)) << 23; +#endif + scale = v_as_f32_u32 (e + v_u32 (0x3f800000)); + absn = v_abs_f32 (n); + cmp = v_cond_u32 (absn > v_f32 (126.0f)); + r2 = r * r; + p = v_fma_f32 (C0, r, C1); + q = v_fma_f32 (C2, r, C3); + q = v_fma_f32 (p, r2, q); + p = C4 * r; + poly = v_fma_f32 (q, r2, p); + if (unlikely (v_any_u32 (cmp))) + return specialcase (poly, n, e, absn, cmp, scale); + return v_fma_f32 (poly, scale, scale); +} +VPCS_ALIAS +#endif diff --git a/math/v_exp2f_1u.c b/math/v_exp2f_1u.c new file mode 100644 index 0000000..1caa14d --- /dev/null +++ b/math/v_exp2f_1u.c @@ -0,0 +1,75 @@ +/* + * Single-precision vector 2^x function. + * + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "mathlib.h" +#include "v_math.h" +#if V_SUPPORTED + +static const float Poly[] = { + /* maxerr: 0.878 ulp. */ + 0x1.416b5ep-13f, 0x1.5f082ep-10f, 0x1.3b2dep-7f, 0x1.c6af7cp-5f, 0x1.ebfbdcp-3f, 0x1.62e43p-1f +}; +#define C0 v_f32 (Poly[0]) +#define C1 v_f32 (Poly[1]) +#define C2 v_f32 (Poly[2]) +#define C3 v_f32 (Poly[3]) +#define C4 v_f32 (Poly[4]) +#define C5 v_f32 (Poly[5]) + +#define Shift v_f32 (0x1.8p23f) +#define InvLn2 v_f32 (0x1.715476p+0f) +#define Ln2hi v_f32 (0x1.62e4p-1f) +#define Ln2lo v_f32 (0x1.7f7d1cp-20f) + +VPCS_ATTR +static v_f32_t +specialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn) +{ + /* 2^n may overflow, break it up into s1*s2. */ + v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x83000000); + v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b); + v_f32_t s2 = v_as_f32_u32 (e - b); + v_u32_t cmp = v_cond_u32 (absn > v_f32 (192.0f)); + v_f32_t r1 = s1 * s1; + v_f32_t r0 = poly * s1 * s2; + return v_as_f32_u32 ((cmp & v_as_u32_f32 (r1)) | (~cmp & v_as_u32_f32 (r0))); +} + +VPCS_ATTR +v_f32_t +V_NAME(exp2f_1u) (v_f32_t x) +{ + v_f32_t n, r, scale, poly, absn; + v_u32_t cmp, e; + + /* exp2(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)] + x = n + r, with r in [-1/2, 1/2]. */ +#if 0 + v_f32_t z; + z = x + Shift; + n = z - Shift; + r = x - n; + e = v_as_u32_f32 (z) << 23; +#else + n = v_round_f32 (x); + r = x - n; + e = v_as_u32_s32 (v_round_s32 (x)) << 23; +#endif + scale = v_as_f32_u32 (e + v_u32 (0x3f800000)); + absn = v_abs_f32 (n); + cmp = v_cond_u32 (absn > v_f32 (126.0f)); + poly = v_fma_f32 (C0, r, C1); + poly = v_fma_f32 (poly, r, C2); + poly = v_fma_f32 (poly, r, C3); + poly = v_fma_f32 (poly, r, C4); + poly = v_fma_f32 (poly, r, C5); + poly = v_fma_f32 (poly, r, v_f32 (1.0f)); + if (unlikely (v_any_u32 (cmp))) + return specialcase (poly, n, e, absn); + return scale * poly; +} +#endif diff --git a/math/v_expf.c b/math/v_expf.c index f536701..d403e00 100644 --- a/math/v_expf.c +++ b/math/v_expf.c @@ -53,11 +53,19 @@ V_NAME(expf) (v_f32_t x) /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ +#if 1 z = v_fma_f32 (x, InvLn2, Shift); n = z - Shift; r = v_fma_f32 (n, -Ln2hi, x); r = v_fma_f32 (n, -Ln2lo, r); e = v_as_u32_f32 (z) << 23; +#else + z = x * InvLn2; + n = v_round_f32 (z); + r = v_fma_f32 (n, -Ln2hi, x); + r = v_fma_f32 (n, -Ln2lo, r); + e = v_as_u32_s32 (v_round_s32 (z)) << 23; +#endif scale = v_as_f32_u32 (e + v_u32 (0x3f800000)); absn = v_abs_f32 (n); cmp = v_cond_u32 (absn > v_f32 (126.0f)); diff --git a/math/v_expf_1u.c b/math/v_expf_1u.c index 37d3d1e..023bd24 100644 --- a/math/v_expf_1u.c +++ b/math/v_expf_1u.c @@ -51,11 +51,19 @@ V_NAME(expf_1u) (v_f32_t x) /* exp(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)] x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ +#if 1 z = v_fma_f32 (x, InvLn2, Shift); n = z - Shift; r = v_fma_f32 (n, -Ln2hi, x); r = v_fma_f32 (n, -Ln2lo, r); e = v_as_u32_f32 (z) << 23; +#else + z = x * InvLn2; + n = v_round_f32 (z); + r = v_fma_f32 (n, -Ln2hi, x); + r = v_fma_f32 (n, -Ln2lo, r); + e = v_as_u32_s32 (v_round_s32 (z)) << 23; +#endif scale = v_as_f32_u32 (e + v_u32 (0x3f800000)); absn = v_abs_f32 (n); cmp = v_cond_u32 (absn > v_f32 (126.0f)); diff --git a/math/vn_exp2f.c b/math/vn_exp2f.c new file mode 100644 index 0000000..db9707e --- /dev/null +++ b/math/vn_exp2f.c @@ -0,0 +1,12 @@ +/* + * AdvSIMD vector PCS variant of __v_exp2f. + * + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#include "mathlib.h" +#ifdef __vpcs +#define VPCS 1 +#define VPCS_ALIAS strong_alias (__vn_exp2f, _ZGVnN4v_exp2f) +#include "v_exp2f.c" +#endif diff --git a/math/vn_exp2f_1u.c b/math/vn_exp2f_1u.c new file mode 100644 index 0000000..17bd0ab --- /dev/null +++ b/math/vn_exp2f_1u.c @@ -0,0 +1,11 @@ +/* + * AdvSIMD vector PCS variant of __v_exp2f_1u. + * + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#include "mathlib.h" +#ifdef __vpcs +#define VPCS 1 +#include "v_exp2f_1u.c" +#endif |