diff options
-rw-r--r-- | math/include/mathlib.h | 8 | ||||
-rw-r--r-- | math/s_cos.c | 6 | ||||
-rw-r--r-- | math/s_sin.c | 6 | ||||
-rw-r--r-- | math/test/mathbench.c | 8 | ||||
-rw-r--r-- | math/test/ulp.c | 14 | ||||
-rw-r--r-- | math/tools/v_sin.sollya | 36 | ||||
-rw-r--r-- | math/v_cos.c | 87 | ||||
-rw-r--r-- | math/v_sin.c | 86 | ||||
-rw-r--r-- | math/vn_cos.c | 12 | ||||
-rw-r--r-- | math/vn_sin.c | 12 |
10 files changed, 275 insertions, 0 deletions
diff --git a/math/include/mathlib.h b/math/include/mathlib.h index 1788502..48d0544 100644 --- a/math/include/mathlib.h +++ b/math/include/mathlib.h @@ -30,6 +30,8 @@ float __s_expf (float); float __s_expf_1u (float); float __s_logf (float); float __s_powf (float, float); +double __s_sin (double); +double __s_cos (double); double __s_exp (double); #if __aarch64__ @@ -50,6 +52,8 @@ __f32x4_t __v_expf (__f32x4_t); __f32x4_t __v_expf_1u (__f32x4_t); __f32x4_t __v_logf (__f32x4_t); __f32x4_t __v_powf (__f32x4_t, __f32x4_t); +__f64x2_t __v_sin (__f64x2_t); +__f64x2_t __v_cos (__f64x2_t); __f64x2_t __v_exp (__f64x2_t); #if __GNUC__ >= 9 || __clang_major__ >= 8 @@ -62,6 +66,8 @@ __vpcs __f32x4_t __vn_expf (__f32x4_t); __vpcs __f32x4_t __vn_expf_1u (__f32x4_t); __vpcs __f32x4_t __vn_logf (__f32x4_t); __vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t); +__vpcs __f64x2_t __vn_sin (__f64x2_t); +__vpcs __f64x2_t __vn_cos (__f64x2_t); __vpcs __f64x2_t __vn_exp (__f64x2_t); /* Vector functions following the vector PCS using ABI names. */ @@ -70,6 +76,8 @@ __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); +__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); +__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t); __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t); #endif #endif diff --git a/math/s_cos.c b/math/s_cos.c new file mode 100644 index 0000000..53a95b0 --- /dev/null +++ b/math/s_cos.c @@ -0,0 +1,6 @@ +/* + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#define SCALAR 1 +#include "v_cos.c" diff --git a/math/s_sin.c b/math/s_sin.c new file mode 100644 index 0000000..06982c2 --- /dev/null +++ b/math/s_sin.c @@ -0,0 +1,6 @@ +/* + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#define SCALAR 1 +#include "v_sin.c" diff --git a/math/test/mathbench.c b/math/test/mathbench.c index 84e69f5..7831971 100644 --- a/math/test/mathbench.c +++ b/math/test/mathbench.c @@ -207,6 +207,8 @@ static const struct fun #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}}, #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}}, D (dummy, 1.0, 2.0) +D (__s_sin, -3.1, 3.1) +D (__s_cos, -3.1, 3.1) D (exp, -9.9, 9.9) D (exp, 0.5, 1.0) D (__s_exp, -9.9, 9.9) @@ -253,6 +255,8 @@ F (cosf, 1e6, 1e32) F (__s_cosf, -3.1, 3.1) #if __aarch64__ VD (__v_dummy, 1.0, 2.0) +VD (__v_sin, -3.1, 3.1) +VD (__v_cos, -3.1, 3.1) VD (__v_exp, -9.9, 9.9) VF (__v_dummyf, 1.0, 2.0) VF (__v_expf, -9.9, 9.9) @@ -265,6 +269,10 @@ VF (__v_cosf, -3.1, 3.1) VND (__vn_dummy, 1.0, 2.0) VND (__vn_exp, -9.9, 9.9) VND (_ZGVnN2v_exp, -9.9, 9.9) +VND (__vn_sin, -3.1, 3.1) +VND (_ZGVnN2v_sin, -3.1, 3.1) +VND (__vn_cos, -3.1, 3.1) +VND (_ZGVnN2v_cos, -3.1, 3.1) VNF (__vn_dummyf, 1.0, 2.0) VNF (__vn_expf, -9.9, 9.9) VNF (_ZGVnN4v_expf, -9.9, 9.9) diff --git a/math/test/ulp.c b/math/test/ulp.c index 514a245..6a3ed12 100644 --- a/math/test/ulp.c +++ b/math/test/ulp.c @@ -229,6 +229,8 @@ static float v_expf_1u(float x) { return __v_expf_1u(argf(x))[0]; } static float v_expf(float x) { return __v_expf(argf(x))[0]; } static float v_logf(float x) { return __v_logf(argf(x))[0]; } static float v_powf(float x, float y) { return __v_powf(argf(x),argf(y))[0]; } +static double v_sin(double x) { return __v_sin(argd(x))[0]; } +static double v_cos(double x) { return __v_cos(argd(x))[0]; } static double v_exp(double x) { return __v_exp(argd(x))[0]; } #ifdef __vpcs static float vn_sinf(float x) { return __vn_sinf(argf(x))[0]; } @@ -237,12 +239,16 @@ static float vn_expf_1u(float x) { return __vn_expf_1u(argf(x))[0]; } static float vn_expf(float x) { return __vn_expf(argf(x))[0]; } static float vn_logf(float x) { return __vn_logf(argf(x))[0]; } static float vn_powf(float x, float y) { return __vn_powf(argf(x),argf(y))[0]; } +static double vn_sin(double x) { return __vn_sin(argd(x))[0]; } +static double vn_cos(double x) { return __vn_cos(argd(x))[0]; } static double vn_exp(double x) { return __vn_exp(argd(x))[0]; } static float Z_sinf(float x) { return _ZGVnN4v_sinf(argf(x))[0]; } static float Z_cosf(float x) { return _ZGVnN4v_cosf(argf(x))[0]; } static float Z_expf(float x) { return _ZGVnN4v_expf(argf(x))[0]; } static float Z_logf(float x) { return _ZGVnN4v_logf(argf(x))[0]; } static float Z_powf(float x, float y) { return _ZGVnN4vv_powf(argf(x),argf(y))[0]; } +static double Z_sin(double x) { return _ZGVnN2v_sin(argd(x))[0]; } +static double Z_cos(double x) { return _ZGVnN2v_cos(argd(x))[0]; } static double Z_exp(double x) { return _ZGVnN2v_exp(argd(x))[0]; } #endif #endif @@ -308,6 +314,8 @@ static const struct fun fun[] = { F (__s_expf, __s_expf, exp, mpfr_exp, 1, 1, f1, 0) F (__s_powf, __s_powf, pow, mpfr_pow, 2, 1, f2, 0) F (__s_logf, __s_logf, log, mpfr_log, 1, 1, f1, 0) + F (__s_sin, __s_sin, sinl, mpfr_sin, 1, 0, d1, 0) + F (__s_cos, __s_cos, cosl, mpfr_cos, 1, 0, d1, 0) F (__s_exp, __s_exp, expl, mpfr_exp, 1, 0, d1, 0) #if __aarch64__ F (__v_sinf, v_sinf, sin, mpfr_sin, 1, 1, f1, 1) @@ -316,6 +324,8 @@ static const struct fun fun[] = { F (__v_expf, v_expf, exp, mpfr_exp, 1, 1, f1, 1) F (__v_logf, v_logf, log, mpfr_log, 1, 1, f1, 1) F (__v_powf, v_powf, pow, mpfr_pow, 2, 1, f2, 1) + F (__v_sin, v_sin, sinl, mpfr_sin, 1, 0, d1, 1) + F (__v_cos, v_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (__v_exp, v_exp, expl, mpfr_exp, 1, 0, d1, 1) #ifdef __vpcs F (__vn_sinf, vn_sinf, sin, mpfr_sin, 1, 1, f1, 1) @@ -324,12 +334,16 @@ static const struct fun fun[] = { F (__vn_expf, vn_expf, exp, mpfr_exp, 1, 1, f1, 1) F (__vn_logf, vn_logf, log, mpfr_log, 1, 1, f1, 1) F (__vn_powf, vn_powf, pow, mpfr_pow, 2, 1, f2, 1) + F (__vn_sin, vn_sin, sinl, mpfr_sin, 1, 0, d1, 1) + F (__vn_cos, vn_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (__vn_exp, vn_exp, expl, mpfr_exp, 1, 0, d1, 1) F (_ZGVnN4v_sinf, Z_sinf, sin, mpfr_sin, 1, 1, f1, 1) F (_ZGVnN4v_cosf, Z_cosf, cos, mpfr_cos, 1, 1, f1, 1) F (_ZGVnN4v_expf, Z_expf, exp, mpfr_exp, 1, 1, f1, 1) F (_ZGVnN4v_logf, Z_logf, log, mpfr_log, 1, 1, f1, 1) F (_ZGVnN4vv_powf, Z_powf, pow, mpfr_pow, 2, 1, f2, 1) + F (_ZGVnN2v_sin, Z_sin, sinl, mpfr_sin, 1, 0, d1, 1) + F (_ZGVnN2v_cos, Z_cos, cosl, mpfr_cos, 1, 0, d1, 1) F (_ZGVnN2v_exp, Z_exp, expl, mpfr_exp, 1, 0, d1, 1) #endif #endif diff --git a/math/tools/v_sin.sollya b/math/tools/v_sin.sollya new file mode 100644 index 0000000..65cc995 --- /dev/null +++ b/math/tools/v_sin.sollya @@ -0,0 +1,36 @@ +// polynomial for approximating sin(x) +// +// Copyright (c) 2019, Arm Limited. +// SPDX-License-Identifier: MIT + +deg = 15; // polynomial degree +a = -pi/2; // interval +b = pi/2; + +// find even polynomial with minimal abs error compared to sin(x)/x + +// account for /x +deg = deg-1; + +// f = sin(x)/x; +f = 1; +c = 1; +for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; }; + +// return p that minimizes |f(x) - poly(x) - x^d*p(x)| +approx = proc(poly,d) { + return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); +}; + +// first coeff is fixed, iteratively find optimal double prec coeffs +poly = 1; +for i from 1 to deg/2 do { + p = roundcoefficients(approx(poly,2*i), [|D ...|]); + poly = poly + x^(2*i)*coeff(p,0); +}; + +display = hexadecimal; +print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30)); +print("in [",a,b,"]"); +print("coeffs:"); +for i from 0 to deg do coeff(poly,i); diff --git a/math/v_cos.c b/math/v_cos.c new file mode 100644 index 0000000..20ba6bd --- /dev/null +++ b/math/v_cos.c @@ -0,0 +1,87 @@ +/* + * Double-precision vector cos function. + * + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "mathlib.h" +#include "v_math.h" +#if V_SUPPORTED + +static const double Poly[] = { +/* worst-case error is 3.5 ulp. + abs error: 0x1.be222a58p-53 in [-pi/2, pi/2]. */ +-0x1.9f4a9c8b21dc9p-41, + 0x1.60e88a10163f2p-33, +-0x1.ae6361b7254e7p-26, + 0x1.71de382e8d62bp-19, +-0x1.a01a019aeb4ffp-13, + 0x1.111111110b25ep-7, +-0x1.55555555554c3p-3, +}; + +#define C7 v_f64 (Poly[0]) +#define C6 v_f64 (Poly[1]) +#define C5 v_f64 (Poly[2]) +#define C4 v_f64 (Poly[3]) +#define C3 v_f64 (Poly[4]) +#define C2 v_f64 (Poly[5]) +#define C1 v_f64 (Poly[6]) + +#define InvPi v_f64 (0x1.45f306dc9c883p-2) +#define HalfPi v_f64 (0x1.921fb54442d18p+0) +#define Pi1 v_f64 (0x1.921fb54442d18p+1) +#define Pi2 v_f64 (0x1.1a62633145c06p-53) +#define Pi3 v_f64 (0x1.c1cd129024e09p-106) +#define Shift v_f64 (0x1.8p52) +#define RangeVal v_f64 (0x1p23) +#define AbsMask v_u64 (0x7fffffffffffffff) + +VPCS_ATTR +__attribute__ ((noinline)) static v_f64_t +specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp) +{ + return v_call_f64 (cos, x, y, cmp); +} + +VPCS_ATTR +v_f64_t +V_NAME(cos) (v_f64_t x) +{ + v_f64_t n, r, r2, y; + v_u64_t odd, cmp; + + r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask); + cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal)); + + /* n = rint((|x|+pi/2)/pi) - 0.5. */ + n = v_fma_f64 (InvPi, r + HalfPi, Shift); + odd = v_as_u64_f64 (n) << 63; + n -= Shift; + n -= v_f64 (0.5); + + /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ + r = v_fma_f64 (-Pi1, n, r); + r = v_fma_f64 (-Pi2, n, r); + r = v_fma_f64 (-Pi3, n, r); + + /* sin(r) poly approx. */ + r2 = r * r; + y = v_fma_f64 (C7, r2, C6); + y = v_fma_f64 (y, r2, C5); + y = v_fma_f64 (y, r2, C4); + y = v_fma_f64 (y, r2, C3); + y = v_fma_f64 (y, r2, C2); + y = v_fma_f64 (y, r2, C1); + y = v_fma_f64 (y * r2, r, r); + + /* sign. */ + y = v_as_f64_u64 (v_as_u64_f64 (y) ^ odd); + + if (unlikely (v_any_u64 (cmp))) + return specialcase (x, y, cmp); + return y; +} +VPCS_ALIAS +#endif diff --git a/math/v_sin.c b/math/v_sin.c new file mode 100644 index 0000000..2b9ed05 --- /dev/null +++ b/math/v_sin.c @@ -0,0 +1,86 @@ +/* + * Double-precision vector sin function. + * + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "mathlib.h" +#include "v_math.h" +#if V_SUPPORTED + +static const double Poly[] = { +/* worst-case error is 3.5 ulp. + abs error: 0x1.be222a58p-53 in [-pi/2, pi/2]. */ +-0x1.9f4a9c8b21dc9p-41, + 0x1.60e88a10163f2p-33, +-0x1.ae6361b7254e7p-26, + 0x1.71de382e8d62bp-19, +-0x1.a01a019aeb4ffp-13, + 0x1.111111110b25ep-7, +-0x1.55555555554c3p-3, +}; + +#define C7 v_f64 (Poly[0]) +#define C6 v_f64 (Poly[1]) +#define C5 v_f64 (Poly[2]) +#define C4 v_f64 (Poly[3]) +#define C3 v_f64 (Poly[4]) +#define C2 v_f64 (Poly[5]) +#define C1 v_f64 (Poly[6]) + +#define InvPi v_f64 (0x1.45f306dc9c883p-2) +#define Pi1 v_f64 (0x1.921fb54442d18p+1) +#define Pi2 v_f64 (0x1.1a62633145c06p-53) +#define Pi3 v_f64 (0x1.c1cd129024e09p-106) +#define Shift v_f64 (0x1.8p52) +#define RangeVal v_f64 (0x1p23) +#define AbsMask v_u64 (0x7fffffffffffffff) + +VPCS_ATTR +__attribute__ ((noinline)) static v_f64_t +specialcase (v_f64_t x, v_f64_t y, v_u64_t cmp) +{ + return v_call_f64 (sin, x, y, cmp); +} + +VPCS_ATTR +v_f64_t +V_NAME(sin) (v_f64_t x) +{ + v_f64_t n, r, r2, y; + v_u64_t sign, odd, cmp; + + r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask); + sign = v_as_u64_f64 (x) & ~AbsMask; + cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal)); + + /* n = rint(|x|/pi). */ + n = v_fma_f64 (InvPi, r, Shift); + odd = v_as_u64_f64 (n) << 63; + n -= Shift; + + /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ + r = v_fma_f64 (-Pi1, n, r); + r = v_fma_f64 (-Pi2, n, r); + r = v_fma_f64 (-Pi3, n, r); + + /* sin(r) poly approx. */ + r2 = r * r; + y = v_fma_f64 (C7, r2, C6); + y = v_fma_f64 (y, r2, C5); + y = v_fma_f64 (y, r2, C4); + y = v_fma_f64 (y, r2, C3); + y = v_fma_f64 (y, r2, C2); + y = v_fma_f64 (y, r2, C1); + y = v_fma_f64 (y * r2, r, r); + + /* sign. */ + y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign ^ odd); + + if (unlikely (v_any_u64 (cmp))) + return specialcase (x, y, cmp); + return y; +} +VPCS_ALIAS +#endif diff --git a/math/vn_cos.c b/math/vn_cos.c new file mode 100644 index 0000000..b57a549 --- /dev/null +++ b/math/vn_cos.c @@ -0,0 +1,12 @@ +/* + * AdvSIMD vector PCS variant of __v_cos. + * + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#include "mathlib.h" +#ifdef __vpcs +#define VPCS 1 +#define VPCS_ALIAS strong_alias (__vn_cos, _ZGVnN2v_cos) +#include "v_cos.c" +#endif diff --git a/math/vn_sin.c b/math/vn_sin.c new file mode 100644 index 0000000..905c796 --- /dev/null +++ b/math/vn_sin.c @@ -0,0 +1,12 @@ +/* + * AdvSIMD vector PCS variant of __v_sin. + * + * Copyright (c) 2019, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#include "mathlib.h" +#ifdef __vpcs +#define VPCS 1 +#define VPCS_ALIAS strong_alias (__vn_sin, _ZGVnN2v_sin) +#include "v_sin.c" +#endif |